def test_table_exists_function(self):

        from fundamentals.mysql import writequery
        sqlQuery = "CREATE TABLE `testing_table` (`id` INT NOT NULL, PRIMARY KEY (`id`))"
        writequery(
            log=log,
            sqlQuery=sqlQuery,
            dbConn=dbConn,
            Force=False,
            manyValueList=False
        )

        from fundamentals.mysql import table_exists
        tableName = "testing_table"
        this = table_exists(
            dbConn=dbConn,
            log=log,
            dbTableName=tableName
        )
        print "%(tableName)s exists: %(this)s" % locals()

        from fundamentals.mysql import writequery
        sqlQuery = "DROP TABLE `testing_table`;"
        writequery(
            log=log,
            sqlQuery=sqlQuery,
            dbConn=dbConn,
            Force=False,
            manyValueList=False
        )
    def test_writequery_function(self):

        from fundamentals.mysql import writequery
        sqlQuery = "CREATE TABLE `testing_table` (`id` INT NOT NULL, PRIMARY KEY (`id`))"
        writequery(
            log=log,
            sqlQuery=sqlQuery,
            dbConn=dbConn,
            Force=False,
            manyValueList=False
        )
    def test_writequery_function_delete(self):

        from fundamentals.mysql import writequery
        sqlQuery = "DROP TABLE `testing_table`;"
        writequery(
            log=log,
            sqlQuery=sqlQuery,
            dbConn=dbConn,
            Force=False,
            manyValueList=False
        )
    def test_writequery_error_force(self):

        from fundamentals.mysql import writequery
        sqlQuery = "rubbish query;"
        writequery(
            log=log,
            sqlQuery=sqlQuery,
            dbConn=dbConn,
            Force=True,
            manyValueList=False
        )
    def test_manyvalue_insert(self):
        from fundamentals.mysql import writequery
        sqlQuery = "CREATE TABLE `testing_table` (`id` INT NOT NULL, PRIMARY KEY (`id`))"
        writequery(
            log=log,
            sqlQuery=sqlQuery,
            dbConn=dbConn,
            Force=False,
            manyValueList=False
        )

        from fundamentals.mysql import writequery
        sqlQuery = """INSERT INTO testing_table (id) values (%s)"""
        writequery(
            log=log,
            sqlQuery=sqlQuery,
            dbConn=dbConn,
            Force=False,
            manyValueList=[(1,), (2,), (3,), (4,), (5,), (6,), (7,),
                           (8,), (9,), (10,), (11,), (12,), ]
        )
Example #6
0
    def _update_database_for_sent_item(
            self,
            primaryId,
            success):
        """*update the database to indicate that the PDFs have been sent to kindle(s)*


        **Key Arguments:**
            - ``primaryId`` -- unique ID of database entry to update
            - ``success`` -- success message/number

        **Return:**
            - None
        """
        self.log.info(
            'starting the ``__update_database_for_sent_item`` method')

        if success == True:
            sqlQuery = u"""
                update `reading-list` set sentToKindle = 1 where primaryId = %(primaryId)s
            """ % locals()
            writequery(
                log=self.log,
                sqlQuery=sqlQuery,
                dbConn=self.dbConn
            )
        elif success == 404:
            sqlQuery = u"""
                update `reading-list` set sentToKindle = -1 where primaryId = %(primaryId)s
            """ % locals()
            writequery(
                log=self.log,
                sqlQuery=sqlQuery,
                dbConn=self.dbConn
            )

        self.log.info(
            'completed the ``__update_database_for_sent_item`` method')
        return None
    def update_gravity_event_annotations(
            self):
        """*update gravity event annotations*

        **Key Arguments:**
            # -

        **Return:**
            - None

        **Usage:**
            ..  todo::

                - add usage info
                - create a sublime snippet for usage
                - write a command-line tool for this method
                - update package tutorial with command-line tool info if needed

            .. code-block:: python

                usage code

        """
        self.log.debug(
            'completed the ````update_gravity_event_annotations`` method')

        from breaker.transients import annotator

        # CREATE THE ANNOTATION HELPER TABLES IF THEY DON"T EXIST
        moduleDirectory = os.path.dirname(__file__)
        mysql_scripts = moduleDirectory + "/resources/mysql"
        for db in ["ps1gw", "ps13pi", "atlas"]:
            directory_script_runner(
                log=self.log,
                pathToScriptDirectory=mysql_scripts,
                databaseName=self.settings["database settings"][db]["db"],
                loginPath=self.settings["database settings"][db]["loginPath"],
                waitForResult=True,
                successRule=False,
                failureRule=False
            )
        for db in ["ligo_virgo_waves"]:
            directory_script_runner(
                log=self.log,
                pathToScriptDirectory=mysql_scripts + "/ps1_skycell_help_tables",
                databaseName=self.settings["database settings"][db]["db"],
                loginPath=self.settings["database settings"][db]["loginPath"],
                waitForResult=True,
                successRule=False,
                failureRule=False
            )

        # UPDATE THE TABLE WITH THE METADATA OF EACH GRAVITY EVENT
        sqlQuery = ""
        for g in self.settings["gravitational waves"]:
            h = self.settings["gravitational waves"][g]["human-name"]
            m = self.settings["gravitational waves"][g]["mjd"]
            cmd = """insert ignore into tcs_gravity_events (`gracedb_id`, `gravity_event_id`, `mjd`) VALUES ("%(g)s", "%(h)s", %(m)s) on duplicate key update mjd=%(m)s;\n""" % locals(
            )
            sqlQuery += cmd
        for db in [self.atlasDbConn, self.ps1gwDbConn, self.ps13piDbConn]:
            writequery(
                log=self.log,
                sqlQuery=sqlQuery,
                dbConn=db
            )
        sqlQuery = sqlQuery.replace("tcs_gravity_events", "gravity_events")
        writequery(
            log=self.log,
            sqlQuery=sqlQuery,
            dbConn=self.ligo_virgo_wavesDbConn,
        )
        for db in ["ps1gw", "ps13pi", "atlas"]:
            directory_script_runner(
                log=self.log,
                pathToScriptDirectory=mysql_scripts,
                databaseName=self.settings["database settings"][db]["db"],
                loginPath=self.settings["database settings"][db]["loginPath"],
                waitForResult=True,
                successRule=False,
                failureRule=False
            )
        for db in ["ligo_virgo_waves"]:
            directory_script_runner(
                log=self.log,
                pathToScriptDirectory=mysql_scripts + "/ps1_skycell_help_tables",
                databaseName=self.settings["database settings"][db]["db"],
                loginPath=self.settings["database settings"][db]["loginPath"],
                waitForResult=True,
                successRule=False,
                failureRule=False
            )

        dbDict = {
            "ps1gw": self.ps1gwDbConn,
            "atlas": self.atlasDbConn,
            "ps13pi": self.ps13piDbConn,
            "ligo_virgo_waves": self.ligo_virgo_wavesDbConn
        }

        for db in dbDict.keys():

            for g in self.settings["gravitational waves"]:
                h = self.settings["gravitational waves"][g]["human-name"]
                print "Annotating new transients associated with gravity event %(h)s" % locals()
                m = self.settings["gravitational waves"][g]["mjd"]
                mapPath = self.settings["gravitational waves"][g]["mapPath"]
                mapName = os.path.basename(mapPath)

                thisDbConn = dbDict[db]

                if thisDbConn in [self.ps1gwDbConn, self.ps13piDbConn]:

                    sqlQuery = u"""
                        SELECT
                            a.transient_object_id, a.gracedb_id, t.ra_psf, t.dec_psf
                        FROM
                            tcs_transient_objects t,
                            tcs_gravity_event_annotations a
                        WHERE
                            a.transient_object_id = t.id
                                AND t.detection_list_id != 0
                                AND (a.map_name !=
                                     "%(mapName)s"  or a.map_name is null)
                                AND a.gracedb_id="%(g)s";
                    """ % locals()

                    rows = readquery(
                        log=self.log,
                        sqlQuery=sqlQuery,
                        dbConn=thisDbConn,
                        quiet=False
                    )

                    transients = {}
                    for r in rows:
                        transients[r["transient_object_id"]] = (
                            r["ra_psf"], r["dec_psf"])

                    an = annotator(
                        log=self.log,
                        settings=self.settings,
                        gwid=g
                    )
                    transientNames, probs = an.annotate(transients)

                if thisDbConn in [self.atlasDbConn]:
                    sqlQuery = u"""
                        SELECT
                            a.transient_object_id, a.gracedb_id, t.ra, t.dec
                        FROM
                            atlas_diff_objects t,
                            tcs_gravity_event_annotations a
                        WHERE
                            a.transient_object_id = t.id
                                AND t.detection_list_id != 0
                                AND (a.map_name !=
                                     "%(mapName)s"  or a.map_name is null)
                                AND a.gracedb_id="%(g)s";
                    """ % locals()
                    rows = readquery(
                        log=self.log,
                        sqlQuery=sqlQuery,
                        dbConn=thisDbConn,
                        quiet=False
                    )

                    transients = {}
                    for r in rows:
                        transients[r["transient_object_id"]] = (
                            r["ra"], r["dec"])

                    an = annotator(
                        log=self.log,
                        settings=self.settings,
                        gwid=g
                    )
                    transientNames, probs = an.annotate(transients)

                if thisDbConn in [self.ligo_virgo_wavesDbConn]:

                    # PANSTARRS SKYCELLS
                    sqlQuery = u"""
                        SELECT 
                                a.skycell_id, a.gracedb_id, t.raDeg, t.decDeg
                            FROM
                                ps1_skycell_map t,
                                ps1_skycell_gravity_event_annotations a
                            WHERE
                                a.skycell_id = t.skycell_id
                                AND (a.map_name != "%(mapName)s"  or a.map_name is null)
                                AND a.gracedb_id="%(g)s"; 
                    """ % locals()
                    rows = readquery(
                        log=self.log,
                        sqlQuery=sqlQuery,
                        dbConn=thisDbConn,
                        quiet=False
                    )

                    exposures = {}
                    for r in rows:
                        exposures[r["skycell_id"]] = (
                            r["raDeg"], r["decDeg"])

                    stats = survey_footprint(
                        log=self.log,
                        settings=self.settings,
                        gwid=g
                    )
                    exposureIDs, probs = stats.annotate_exposures(
                        exposures=exposures,
                        pointingSide=0.4
                    )

                    dataList = []
                    for p, t in zip(probs, exposureIDs):
                        dataList.append({
                            "skycell_id": t,
                            "prob_coverage": p,
                            "gracedb_id": g,
                            "map_name": mapName
                        })
                    tableName = "ps1_skycell_gravity_event_annotations"

                    dataSet = list_of_dictionaries(
                        log=self.log,
                        listOfDictionaries=dataList,
                        reDatetime=re.compile('^[0-9]{4}-[0-9]{2}-[0-9]{2}T')
                    )
                    # RECURSIVELY CREATE MISSING DIRECTORIES
                    if not os.path.exists("/tmp/mysqlinsert/%(db)s" % locals()):
                        os.makedirs("/tmp/mysqlinsert/%(db)s" % locals())
                    now = datetime.now()
                    now = now.strftime("%Y%m%dt%H%M%S%f")
                    mysqlData = dataSet.mysql(
                        tableName=tableName, filepath="/tmp/mysqlinsert/%(db)s/%(now)s.sql" % locals(), createStatement=False)

                    # ATLAS EXPOSURES
                    sqlQuery = u"""
                        SELECT 
                                atlas_object_id, gracedb_id, raDeg, decDeg
                            FROM
                                atlas_exposure_gravity_event_annotations
                            WHERE
                                (map_name != "%(mapName)s"  or map_name is null)
                                AND gracedb_id="%(g)s"; 
                    """ % locals()
                    rows = readquery(
                        log=self.log,
                        sqlQuery=sqlQuery,
                        dbConn=thisDbConn,
                        quiet=False
                    )

                    exposures = {}
                    for r in rows:
                        exposures[r["atlas_object_id"]] = (
                            r["raDeg"], r["decDeg"])

                    stats = survey_footprint(
                        log=self.log,
                        settings=self.settings,
                        gwid=g
                    )
                    exposureIDs, probs = stats.annotate_exposures(
                        exposures=exposures,
                        pointingSide=5.46
                    )

                    dataList = []
                    for p, t in zip(probs, exposureIDs):
                        dataList.append({
                            "atlas_object_id": t,
                            "prob_coverage": p,
                            "gracedb_id": g,
                            "map_name": mapName
                        })
                    tableName = "atlas_exposure_gravity_event_annotations"

                    dataSet = list_of_dictionaries(
                        log=self.log,
                        listOfDictionaries=dataList,
                        reDatetime=re.compile('^[0-9]{4}-[0-9]{2}-[0-9]{2}T')
                    )
                    # RECURSIVELY CREATE MISSING DIRECTORIES
                    if not os.path.exists("/tmp/mysqlinsert/%(db)s" % locals()):
                        os.makedirs("/tmp/mysqlinsert/%(db)s" % locals())
                    now = datetime.now()
                    now = now.strftime("%Y%m%dt%H%M%S%f")
                    mysqlData = dataSet.mysql(
                        tableName=tableName, filepath="/tmp/mysqlinsert/%(db)s/%(now)s.sql" % locals(), createStatement=False)

                if thisDbConn not in [self.ligo_virgo_wavesDbConn]:
                    dataList = []
                    for p, t in zip(probs, transientNames):
                        dataList.append({
                            "transient_object_id": t,
                            "enclosing_contour": p,
                            "gracedb_id": g,
                            "map_name": mapName
                        })
                        tableName = "tcs_gravity_event_annotations"

                    dataSet = list_of_dictionaries(
                        log=self.log,
                        listOfDictionaries=dataList,
                        reDatetime=re.compile('^[0-9]{4}-[0-9]{2}-[0-9]{2}T')
                    )
                    # RECURSIVELY CREATE MISSING DIRECTORIES
                    if not os.path.exists("/tmp/mysqlinsert/%(db)s" % locals()):
                        os.makedirs("/tmp/mysqlinsert/%(db)s" % locals())
                    now = datetime.now()
                    now = now.strftime("%Y%m%dt%H%M%S%f")
                    mysqlData = dataSet.mysql(
                        tableName=tableName, filepath="/tmp/mysqlinsert/%(db)s/%(now)s.sql" % locals(), createStatement=False)

        for db in dbDict.keys():
            directory_script_runner(
                log=self.log,
                pathToScriptDirectory="/tmp/mysqlinsert/%(db)s" % locals(),
                databaseName=self.settings["database settings"][db]["db"],
                loginPath=self.settings["database settings"][db]["loginPath"],
                waitForResult=True,
                successRule=False,
                failureRule=False
            )

        self.log.debug(
            'completed the ``update_gravity_event_annotations`` method')
        return None
Example #8
0
    def _do_ned_namesearch_queries_and_add_resulting_metadata_to_database(
            self, batchCount):
        """*Query NED via name searcha and add result metadata to database*

        **Key Arguments**

        - ``batchCount`` - the index number of the batch sent to NED (only needed for printing to STDOUT to give user idea of progress)
        

        *Usage:*

            ```python
            numberSources = stream._do_ned_namesearch_queries_and_add_resulting_metadata_to_database(batchCount=10)
            ```
        """
        self.log.debug(
            'starting the ``_do_ned_namesearch_queries_and_add_resulting_metadata_to_database`` method'
        )

        # ASTROCALC UNIT CONVERTER OBJECT
        converter = unit_conversion(log=self.log)
        tableName = self.dbTableName

        # QUERY NED WITH BATCH
        totalCount = len(self.theseIds)
        print(
            "requesting metadata from NED for %(totalCount)s galaxies (batch %(batchCount)s)"
            % locals())
        # QUERY THE ONLINE NED DATABASE USING NEDDY'S NAMESEARCH METHOD
        search = namesearch(log=self.log, names=self.theseIds, quiet=True)
        results = search.get()
        print("results returned from ned -- starting to add to database" %
              locals())

        # CLEAN THE RETURNED DATA AND UPDATE DATABASE
        totalCount = len(results)
        count = 0
        sqlQuery = ""
        dictList = []
        for thisDict in results:
            thisDict["tableName"] = tableName
            count += 1
            for k, v in list(thisDict.items()):
                if not v or len(v) == 0:
                    thisDict[k] = "null"
                if k in ["major_diameter_arcmin", "minor_diameter_arcmin"
                         ] and (":" in v or "?" in v or "<" in v):
                    thisDict[k] = v.replace(":",
                                            "").replace("?",
                                                        "").replace("<", "")
                if isinstance(v, ("".__class__, u"".__class__)) and '"' in v:
                    thisDict[k] = v.replace('"', '\\"')
            if "Input name not" not in thisDict[
                    "input_note"] and "Same object as" not in thisDict[
                        "input_note"]:
                try:
                    thisDict["raDeg"] = converter.ra_sexegesimal_to_decimal(
                        ra=thisDict["ra"])
                    thisDict["decDeg"] = converter.dec_sexegesimal_to_decimal(
                        dec=thisDict["dec"])
                except:
                    name = thisDict["input_name"]
                    self.log.warning(
                        "Could not convert the RA & DEC for the %(name)s NED source"
                        % locals())
                    continue
                thisDict["eb_v"] = thisDict["eb-v"]
                thisDict["ned_name"] = thisDict["input_name"]
                row = {}
                for k in [
                        "redshift_quality", "redshift", "hierarchy",
                        "object_type", "major_diameter_arcmin", "morphology",
                        "magnitude_filter", "ned_notes", "eb_v", "raDeg",
                        "radio_morphology", "activity_type",
                        "minor_diameter_arcmin", "decDeg", "redshift_err",
                        "ned_name"
                ]:
                    if thisDict[k] == "null":
                        row[k] = None
                    else:
                        row[k] = thisDict[k]

                dictList.append(row)

        self.add_data_to_database_table(
            dictList=dictList, createStatement="""SET SESSION sql_mode="";""")

        theseIds = ("\", \"").join(self.theseIds)

        sqlQuery = u"""
            update %(tableName)s set download_error = 1 where ned_name in ("%(theseIds)s");
        """ % locals()
        writequery(
            log=self.log,
            sqlQuery=sqlQuery,
            dbConn=self.cataloguesDbConn,
        )

        print(
            "%(count)s/%(totalCount)s galaxy metadata batch entries added to database"
            % locals())
        if count < totalCount:
            # Cursor up one line and clear line
            sys.stdout.write("\x1b[1A\x1b[2K")

        sqlQuery = u"""
            update tcs_helper_catalogue_tables_info set last_updated = now() where table_name = "%(tableName)s"
        """ % locals()
        writequery(
            log=self.log,
            sqlQuery=sqlQuery,
            dbConn=self.cataloguesDbConn,
        )

        self.log.debug(
            'completed the ``_do_ned_namesearch_queries_and_add_resulting_metadata_to_database`` method'
        )
        return None
Example #9
0
def _add_dictlist_to_database_via_load_in_file(masterListIndex, log,
                                               dbTablename, dbSettings):
    """*load a list of dictionaries into a database table with load data infile*

    **Key Arguments:**

        - ``masterListIndex`` -- the index of the sharedList of dictionary lists to process
        - ``dbTablename`` -- the name of the database table to add the list to
        - ``dbSettings`` -- the dictionary of database settings
        - ``log`` -- logger

    **Return:**
        - None

    **Usage:**
        .. todo::

            add usage info
            create a sublime snippet for usage

        .. code-block:: python

            usage code
    """
    log.info(
        'starting the ``_add_dictlist_to_database_via_load_in_file`` function')

    global sharedList

    dictList = sharedList[masterListIndex][0]

    count = sharedList[masterListIndex][1]
    if count > totalCount:
        count = totalCount
    ltotalCount = totalCount

    # SETUP ALL DATABASE CONNECTIONS
    dbConn = database(log=log, dbSettings=dbSettings).connect()

    now = datetime.now()
    tmpTable = now.strftime("tmp_%Y%m%dt%H%M%S%f")

    # CREATE A TEMPORY TABLE TO ADD DATA TO
    sqlQuery = """CREATE TEMPORARY TABLE %(tmpTable)s SELECT * FROM %(dbTablename)s WHERE 1=0;""" % locals(
    )
    writequery(log=log, sqlQuery=sqlQuery, dbConn=dbConn)

    csvColumns = [k for d in dictList for k in d.keys()]
    csvColumns = list(set(csvColumns))
    csvColumnsString = (', ').join(csvColumns)

    df = pd.DataFrame(dictList)
    df.replace(['nan', 'None', '', 'NaN', np.nan], '\\N', inplace=True)
    df.to_csv('/tmp/%(tmpTable)s' % locals(),
              sep="|",
              index=False,
              escapechar="\\",
              quotechar='"',
              columns=csvColumns)

    sqlQuery = """LOAD DATA LOCAL INFILE '/tmp/%(tmpTable)s'
INTO TABLE %(tmpTable)s
FIELDS TERMINATED BY '|' OPTIONALLY ENCLOSED BY '"'
IGNORE 1 LINES
(%(csvColumnsString)s);""" % locals()

    writequery(log=log, sqlQuery=sqlQuery, dbConn=dbConn)

    updateStatement = ""
    for i in csvColumns:
        updateStatement += "`%(i)s` = VALUES(`%(i)s`), " % locals()
    updateStatement += "dateLastModified = NOW(), updated = 1"

    sqlQuery = """
INSERT IGNORE INTO %(dbTablename)s
SELECT * FROM %(tmpTable)s
ON DUPLICATE KEY UPDATE %(updateStatement)s;""" % locals()
    writequery(log=log, sqlQuery=sqlQuery, dbConn=dbConn)

    sqlQuery = """DROP TEMPORARY TABLE %(tmpTable)s;""" % locals()
    writequery(log=log, sqlQuery=sqlQuery, dbConn=dbConn)

    try:
        os.remove('/tmp/%(tmpTable)s' % locals())
    except:
        pass

    log.info(
        'completed the ``_add_dictlist_to_database_via_load_in_file`` function'
    )
    return None
    def populate_ps1_subdisk_table(
            self):
        """
        *Calculate 49 subdisks for each of the PS1 pointings (used to query NED in manageable sized batches) and add them to the ``ps1_pointings_subdisks`` table of the database*

        .. image:: http://i.imgur.com/y3G0aax.png
            :width: 600 px

        **Return:**
            - None

         **Usage:**

            .. code-block:: python

                # SPLIT PS1 POINTINGS INTO SUB-DISKS AND ADD TO LV DATABASE
                from breaker import update_ps1_atlas_footprint_tables
                dbUpdater = update_ps1_atlas_footprint_tables(
                    log=log,
                    settings=settings
                )
                dbUpdater.populate_ps1_subdisk_table()
        """
        self.log.debug(
            'completed the ````populate_ps1_subdisk_table`` method')

        # SELECT THE PS1 POINTINGS NEEDING SUBDISKS CALCULATED
        sqlQuery = u"""
            select ps1_exp_id, raDeg, decDeg from ps1_pointings where subdisks_calculated = 0 and raDeg is not null
        """ % locals()

        rows = readquery(
            log=self.log,
            sqlQuery=sqlQuery,
            dbConn=self.ligo_virgo_wavesDbConn,
            quiet=False
        )
        ps1PointNum = len(rows)

        # CALCULATE ALL OF THE SUBDISKS
        inserts = []
        expIds = []
        for row in rows:
            subDiskCoordinates = self._get_subdisk_parameters(
                row["raDeg"], row["decDeg"], 1.5)
            ps1_exp_id = row["ps1_exp_id"]
            expIds.append(ps1_exp_id)
            for i, c in enumerate(subDiskCoordinates):
                insert = {
                    "raDeg": c[0],
                    "decDeg": c[1],
                    "ps1_exp_id": ps1_exp_id,
                    "circleId": i + 1
                }
                inserts.append(insert)

        # ADD SUBDISKS TO DATABASE
        if len(inserts):

            insert_list_of_dictionaries_into_database_tables(
                dbConn=self.ligo_virgo_wavesDbConn,
                log=self.log,
                dictList=inserts,
                dbTableName="ps1_pointings_subdisks",
                uniqueKeyList=["ps1_exp_id", "circleId"],
                dateModified=False,
                batchSize=2500,
                replace=True
            )

            # UPDATE POINTINGS TABLE TO INDICATE SUBDISKS HAVE BEEN CALCULATED
            theseIds = ",".join(expIds)
            sqlQuery = u"""
                update ps1_pointings set subdisks_calculated = 1 where ps1_exp_id in (%(theseIds)s)
            """ % locals()
            writequery(
                log=self.log,
                sqlQuery=sqlQuery,
                dbConn=self.ligo_virgo_wavesDbConn,
            )

        if ps1PointNum == 0:
            print "All PS1 pointings have been split into their 49 sub-disks" % locals()
        else:
            print "%(ps1PointNum)s new PS1 pointings have been split into 49 sub-disks - parameters added to the `ps1_pointings_subdisks` database table" % locals()

        # APPEND HTMIDs TO THE ps1_pointings_subdisks TABLE
        add_htm_ids_to_mysql_database_table(
            raColName="raDeg",
            declColName="decDeg",
            tableName="ps1_pointings_subdisks",
            dbConn=self.ligo_virgo_wavesDbConn,
            log=self.log,
            primaryIdColumnName="primaryId"
        )

        self.log.debug(
            'completed the ``populate_ps1_subdisk_table`` method')
        return None
def generate_atlas_lightcurves(dbConn, log, settings):
    """generate all atlas FP lightcurves (clipped and stacked)

    **Key Arguments**

    - ``dbConn`` -- mysql database connection
    - ``log`` -- logger
    - ``settings`` -- settings for the marshall.

    ```python
    from marshallEngine.feeders.atlas.lightcurve import generate_atlas_lightcurves
    generate_atlas_lightcurves(
        log=log,
        dbConn=dbConn,
        settings=settings
    )
    ```
    """
    log.debug('starting the ``generate_atlas_lightcurves`` function')

    # SELECT SOURCES THAT NEED THEIR ATLAS FP LIGHTCURVES CREATED/UPDATED
    sqlQuery = u"""
        SELECT
                t.transientBucketId
            FROM
                transientBucket t ,pesstoObjects p
            WHERE
                p.transientBucketId=t.transientBucketId
                and t.survey = 'ATLAS FP' and t.limitingMag = 0
                and ((p.atlas_fp_lightcurve < t.dateCreated and p.atlas_fp_lightcurve != 0) or p.atlas_fp_lightcurve is null)
            GROUP BY t.transientBucketId;
    """
    rows = readquery(log=log, sqlQuery=sqlQuery, dbConn=dbConn)
    transientIds = [r["transientBucketId"] for r in rows]

    total = len(transientIds)
    if total > 1000:
        print(
            "ATLAS lightcurves need generated for %(total)s sources - generating next 1000"
            % locals())
        transientIds = transientIds[:1000]
        total = len(transientIds)
    else:
        print("Generating ATLAS lightcurves for %(total)s sources" % locals())

    # SETUP THE INITIAL FIGURE FOR THE PLOT (ONLY ONCE)
    fig = plt.figure(num=None,
                     figsize=(10, 10),
                     dpi=100,
                     facecolor=None,
                     edgecolor=None,
                     frameon=True)
    mpl.rc('ytick', labelsize=18)
    mpl.rc('xtick', labelsize=18)
    mpl.rcParams.update({'font.size': 22})

    # FORMAT THE AXES
    ax = fig.add_axes([0.1, 0.1, 0.8, 0.8], polar=False, frameon=True)
    ax.set_xlabel('MJD', labelpad=20)
    ax.set_yticks([2.2])

    # RHS AXIS TICKS
    plt.setp(ax.xaxis.get_majorticklabels(),
             rotation=45,
             horizontalalignment='right')
    ax.xaxis.set_major_formatter(mtick.FormatStrFormatter('%5.0f'))

    y_formatter = mpl.ticker.FormatStrFormatter("%2.1f")
    ax.yaxis.set_major_formatter(y_formatter)
    ax.xaxis.grid(False)

    # ADD SECOND Y-AXIS
    ax2 = ax.twinx()
    ax2.yaxis.set_major_formatter(y_formatter)
    ax2.set_ylabel('Flux ($\mu$Jy)', rotation=-90., labelpad=27)
    ax2.grid(False)

    # ADD SECOND X-AXIS
    ax3 = ax.twiny()
    ax3.grid(True)
    plt.setp(ax3.xaxis.get_majorticklabels(),
             rotation=45,
             horizontalalignment='left')

    # CONVERTER TO CONVERT MJD TO DATE
    converter = conversions(log=log)

    if len(transientIds) < 3:
        plotPaths = []
        for transientBucketId in transientIds:
            plotPaths.append(
                plot_single_result(log=log,
                                   transientBucketId=transientBucketId,
                                   fig=fig,
                                   converter=converter,
                                   ax=ax,
                                   settings=settings))
    else:
        log.info("""starting multiprocessing""")
        plotPaths = fmultiprocess(log=log,
                                  function=plot_single_result,
                                  inputArray=transientIds,
                                  poolSize=False,
                                  timeout=7200,
                                  fig=fig,
                                  converter=converter,
                                  ax=ax,
                                  settings=settings)
        log.info("""finished multiprocessing""")

    # REMOVE MISSING PLOTStrn
    transientIdGood = [t for p, t in zip(plotPaths, transientIds) if p]
    transientIdBad = [t for p, t in zip(plotPaths, transientIds) if p is None]

    # UPDATE THE atlas_fp_lightcurve DATE FOR TRANSIENTS WE HAVE JUST
    # GENERATED PLOTS FOR
    if len(transientIdGood):
        transientIdGood = (",").join([str(t) for t in transientIdGood])
        sqlQuery = f"""update pesstoObjects set atlas_fp_lightcurve = NOW() where transientBucketID in ({transientIdGood})"""
        writequery(log=log, sqlQuery=sqlQuery, dbConn=dbConn)

    # UPDATE THE atlas_fp_lightcurve DATE FOR TRANSIENTS WE HAVE JUST
    # GENERATED PLOTS FOR
    if len(transientIdBad):
        transientIdBad = (",").join([str(t) for t in transientIdBad])
        sqlQuery = f"""update pesstoObjects set atlas_fp_lightcurve = 0 where transientBucketID in ({transientIdBad})"""
        writequery(log=log, sqlQuery=sqlQuery, dbConn=dbConn)

    log.debug('completed the ``generate_atlas_lightcurves`` function')
    return None
Example #12
0
    def plot(self):
        """*generate a batch of lightcurves using multiprocessing given their transientBucketIds*

        **Return**

        - ``filepath`` -- path to the last generated plot file


        **Usage**

        ```python
        from marshallEngine.lightcurves import marshall_lightcurves
        lc = marshall_lightcurves(
            log=log,
            dbConn=dbConn,
            settings=settings,
            transientBucketIds=[28421489, 28121353, 4637952, 27409808]
        )
        lc.plot()
        ```

        """
        self.log.debug('starting the ``plot`` method')

        # DEFINE AN INPUT ARRAY
        total = len(self.transientBucketIds)

        thisDict = {"database settings": self.settings["database settings"]}

        if total:
            print("updating lightcurves for %(total)s transients" % locals())
            print()

        # USE IF ISSUES IN _plot_one FUNCTION
        # for transientBucketId in self.transientBucketIds:
        #     _plot_one(
        #         transientBucketId=transientBucketId,
        #         log=self.log,
        #         settings=self.settings
        #     )

        results = fmultiprocess(log=self.log,
                                function=_plot_one,
                                inputArray=self.transientBucketIds,
                                poolSize=False,
                                timeout=3600,
                                settings=self.settings)

        sqlQuery = ""
        updatedTransientBucketIds = []
        for t, r in zip(self.transientBucketIds, results):
            if not r[0]:
                # LIGHTCURVE NOT GENERATED
                continue
            updatedTransientBucketIds.append(t)
            filepath = r[0]
            currentMagnitude = r[1]
            gradient = r[2]
            sqlQuery += """update transientBucketSummaries set currentMagnitudeEstimate = %(currentMagnitude)s, currentMagnitudeEstimateUpdated = NOW(), recentSlopeOfLightcurve = %(gradient)s where transientBucketId = %(t)s;
            """ % locals()
        ids = []
        ids[:] = [str(i) for i in updatedTransientBucketIds]
        updatedTransientBucketIds = (",").join(ids)
        sqlQuery += "update pesstoObjects set master_pessto_lightcurve = 1 where transientBucketId in (%(updatedTransientBucketIds)s);" % locals(
        )

        if len(updatedTransientBucketIds):
            writequery(
                log=self.log,
                sqlQuery=sqlQuery,
                dbConn=self.dbConn,
            )
        else:
            filepath = False

        self.log.debug('completed the ``plot`` method')

        return filepath
Example #13
0
    def _clean_up_columns(self):
        """clean up columns

        .. todo ::

            - update key arguments values and definitions with defaults
            - update return values and definitions
            - update usage examples and text
            - update docstring text
            - check sublime snippet exists
            - clip any useful text to docs mindmap
            - regenerate the docs and check redendering of this docstring
        """
        self.log.debug('starting the ``_clean_up_columns`` method')

        sqlQueries = [
            "update tcs_helper_catalogue_tables_info set old_table_name = table_name where old_table_name is null;",
            "update tcs_helper_catalogue_tables_info set version_number = 'stream' where table_name like '%%stream' and version_number is null;",
            """update tcs_helper_catalogue_tables_info set in_ned = 0 where table_name like '%%stream' and in_ned is null;""",
            """update tcs_helper_catalogue_tables_info set vizier_link = 0 where table_name like '%%stream' and vizier_link is null;""",
            "update tcs_helper_catalogue_views_info set old_view_name = view_name where old_view_name is null;",
        ]

        for sqlQuery in sqlQueries:
            writequery(
                log=self.log,
                sqlQuery=sqlQuery,
                dbConn=self.cataloguesDbConn,
            )

        # VIEW OBJECT TYPES
        sqlQuery = u"""
            SELECT view_name FROM tcs_helper_catalogue_views_info where legacy_view = 0 and object_type is null;
        """ % locals()
        rows = readquery(log=self.log,
                         sqlQuery=sqlQuery,
                         dbConn=self.cataloguesDbConn,
                         quiet=False)

        for row in rows:
            view_name = row["view_name"]
            object_type = view_name.replace("tcs_view_", "").split("_")[0]

            sqlQuery = u"""
                update tcs_helper_catalogue_views_info set object_type = "%(object_type)s" where view_name = "%(view_name)s"
            """ % locals()
            writequery(
                log=self.log,
                sqlQuery=sqlQuery,
                dbConn=self.cataloguesDbConn,
            )

        # MASTER TABLE ID FOR VIEWS
        sqlQuery = u"""
            SELECT view_name FROM tcs_helper_catalogue_views_info where legacy_view = 0 and table_id is null;
        """ % locals()
        rows = readquery(log=self.log,
                         sqlQuery=sqlQuery,
                         dbConn=self.cataloguesDbConn,
                         quiet=False)

        for row in rows:
            view_name = row["view_name"]
            table_name = view_name.replace("tcs_view_", "").split("_")[1:]
            table_name = ("_").join(table_name)
            table_name = "tcs_cat_%(table_name)s" % locals()

            sqlQuery = u"""
                update tcs_helper_catalogue_views_info set table_id = (select id from tcs_helper_catalogue_tables_info where table_name = "%(table_name)s") where view_name = "%(view_name)s"
            """ % locals()
            writequery(
                log=self.log,
                sqlQuery=sqlQuery,
                dbConn=self.cataloguesDbConn,
            )

        self.log.debug('completed the ``_clean_up_columns`` method')
        return None
Example #14
0
def add_htm_ids_to_mysql_database_table(raColName,
                                        declColName,
                                        tableName,
                                        dbConn,
                                        log,
                                        primaryIdColumnName="primaryId",
                                        cartesian=False,
                                        batchSize=50000,
                                        reindex=False,
                                        dbSettings=False):
    """*Given a database connection, a name of a table and the column names for RA and DEC, generates ID for one or more HTM level in the table*

    **Key Arguments**

    - ``raColName`` -- ra in sexegesimal
    - ``declColName`` -- dec in sexegesimal
    - ``tableName`` -- name of table to add htmid info to
    - ``dbConn`` -- database hosting the above table
    - ``log`` -- logger
    - ``primaryIdColumnName`` -- the primary id for the table
    - ``cartesian`` -- add cartesian columns. Default *False*
    - ``batchSize`` -- the size of the batches of rows to add HTMIds to concurrently. Default *2500*
    - ``reindex`` -- reindex the entire table
    - ``dbSettings`` -- yaml settings for database


    **Return**

    - None


    **Usage**

    ```python
    from HMpTy.mysql import add_htm_ids_to_mysql_database_table
    add_htm_ids_to_mysql_database_table(
        raColName="raDeg",
        declColName="decDeg",
        tableName="my_big_star_table",
        dbConn=dbConn,
        log=log,
        primaryIdColumnName="primaryId",
        reindex=False
    )
    ```

    """
    log.debug('starting the ``add_htm_ids_to_mysql_database_table`` function')

    # TEST TABLE EXIST
    sqlQuery = """show tables"""
    rows = readquery(log=log, sqlQuery=sqlQuery, dbConn=dbConn)

    log.debug("""Checking the table %(tableName)s exists in the database""" %
              locals())
    tableList = []
    for row in rows:
        tableList.append(list(row.values())[0].lower())
    if tableName.lower() not in tableList:
        message = "The %s table does not exist in the database" % (tableName, )
        log.critical(message)
        raise IOError(message)

    log.debug(
        """Checking the RA and DEC columns exist in the %(tableName)s table"""
        % locals())
    # TEST COLUMNS EXISTS
    cursor = dbConn.cursor(ms.cursors.DictCursor)
    sqlQuery = """SELECT * FROM %s LIMIT 1""" % (tableName, )
    cursor.execute(sqlQuery)
    rows = cursor.fetchall()
    desc = cursor.description
    existingColumns = []
    for i in range(len(desc)):
        existingColumns.append(desc[i][0])
    if (raColName not in existingColumns) or (declColName
                                              not in existingColumns):
        message = 'Please make sure you have got the naes of the RA and DEC columns correct'
        log.critical(message)
        raise IOError(message)

    if cartesian:
        # ACTION(S) ##
        htmCols = {
            'htm16ID': 'BIGINT(20)',
            'htm13ID': 'INT',
            'htm10ID': 'INT',
            'cx': 'DOUBLE',
            'cy': 'DOUBLE',
            'cz': 'DOUBLE'
        }
    else:
        htmCols = {'htm16ID': 'BIGINT(20)', 'htm13ID': 'INT', 'htm10ID': 'INT'}

    # CHECK IF COLUMNS EXISTS YET - IF NOT CREATE FROM
    for key in list(htmCols.keys()):
        try:
            log.debug(
                'attempting to check and generate the HTMId columns for the %s db table'
                % (tableName, ))
            colExists = \
                """SELECT *
                    FROM information_schema.COLUMNS
                    WHERE TABLE_SCHEMA=DATABASE()
                    AND COLUMN_NAME='%s'
                    AND TABLE_NAME='%s'""" \
                % (key, tableName)
            colExists = readquery(log=log, sqlQuery=colExists, dbConn=dbConn)
            switch = 0
            if not colExists:
                if switch == 0:
                    print("Adding the HTMCircle columns to %(tableName)s" %
                          locals())
                    switch = 1
                sqlQuery = 'ALTER TABLE ' + tableName + ' ADD ' + \
                    key + ' ' + htmCols[key] + ' DEFAULT NULL'
                writequery(
                    log=log,
                    sqlQuery=sqlQuery,
                    dbConn=dbConn,
                )
        except Exception as e:
            log.critical(
                'could not check and generate the HTMId columns for the %s db table - failed with this error: %s '
                % (tableName, str(e)))
            raise e

    log.debug(
        """Counting the number of rows still requiring HTMID information""" %
        locals())
    if reindex:
        sqlQuery = u"""
            SELECT INDEX_NAME FROM INFORMATION_SCHEMA.STATISTICS
                WHERE table_schema=DATABASE() AND table_name='%(tableName)s' and COLUMN_NAME = "%(primaryIdColumnName)s";
        """ % locals()
        keyname = readquery(log=log, sqlQuery=sqlQuery,
                            dbConn=dbConn)[0]["INDEX_NAME"]
        if keyname != "PRIMARY":
            log.error(
                'To reindex the entire table the primaryID you submit must be unique. "%(primaryIdColumnName)s" is not unique in table "%(tableName)s"'
                % locals())
            return

        sqlQuery = """ALTER TABLE `%(tableName)s` disable keys""" % locals()
        writequery(log=log, sqlQuery=sqlQuery, dbConn=dbConn)

        sqlQuery = """SELECT count(*) as count from `%(tableName)s`""" % locals(
        )
    elif cartesian:
        # COUNT ROWS WHERE HTMIDs ARE NOT SET
        sqlQuery = """SELECT count(*) as count from `%(tableName)s` where htm10ID is NULL or cx is null and %(raColName)s is not null""" % locals(
        )
    else:
        # COUNT ROWS WHERE HTMIDs ARE NOT SET
        sqlQuery = """SELECT count(*) as count from `%(tableName)s` where htm10ID is NULL and %(raColName)s is not null""" % locals(
        )
    log.debug("""SQLQUERY:\n\n%(sqlQuery)s\n\n""" % locals())
    rowCount = readquery(log=log,
                         sqlQuery=sqlQuery,
                         dbConn=dbConn,
                         quiet=False)
    totalCount = rowCount[0]["count"]

    # ADD HTMIDs IN BATCHES
    total = totalCount
    batches = int(old_div(total, batchSize))

    count = 0
    lastId = False
    # NOW GENERATE THE HTMLIds FOR THESE ROWS
    for i in range(batches + 1):
        if total == 0:
            continue
        count += batchSize
        if count > batchSize:
            # Cursor up one line and clear line
            sys.stdout.write("\x1b[1A\x1b[2K")
        if count > totalCount:
            count = totalCount

        start = time.time()

        log.debug(
            """Selecting the next %(batchSize)s rows requiring HTMID information in the %(tableName)s table"""
            % locals())
        if reindex:
            # SELECT THE ROWS WHERE THE HTMIds ARE NOT SET
            if lastId:
                sqlQuery = """SELECT `%s`, `%s`, `%s` from `%s` where `%s` > '%s' order by `%s` limit %s""" % (
                    primaryIdColumnName, raColName, declColName, tableName,
                    primaryIdColumnName, lastId, primaryIdColumnName,
                    batchSize)
            else:
                sqlQuery = """SELECT `%s`, `%s`, `%s` from `%s` order by `%s` limit %s""" % (
                    primaryIdColumnName, raColName, declColName, tableName,
                    primaryIdColumnName, batchSize)
        elif cartesian:
            # SELECT THE ROWS WHERE THE HTMIds ARE NOT SET
            sqlQuery = """SELECT `%s`, `%s`, `%s` from `%s` where `%s` is not null and `%s` >= 0 and ((htm10ID is NULL or cx is null)) limit %s""" % (
                primaryIdColumnName, raColName, declColName, tableName,
                raColName, raColName, batchSize)
        else:
            # SELECT THE ROWS WHERE THE HTMIds ARE NOT SET
            sqlQuery = """SELECT `%s`, `%s`, `%s` from `%s` where `%s` is not null and `%s` >= 0 and htm10ID is NULL limit %s""" % (
                primaryIdColumnName, raColName, declColName, tableName,
                raColName, raColName, batchSize)
        batch = readquery(log=log, sqlQuery=sqlQuery, dbConn=dbConn)
        if reindex and len(batch):
            lastId = batch[-1][primaryIdColumnName]
        log.debug(
            """The next %(batchSize)s rows requiring HTMID information have now been selected"""
            % locals())

        raList = []
        decList = []
        pIdList = []
        raList[:] = [r[raColName] for r in batch]
        decList[:] = [r[declColName] for r in batch]
        pIdList[:] = [r[primaryIdColumnName] for r in batch]

        from HMpTy import htm
        mesh16 = htm.HTM(16)
        mesh13 = htm.HTM(13)
        mesh10 = htm.HTM(10)

        log.debug('calculating htmIds for batch of %s rows in %s db table' % (
            batchSize,
            tableName,
        ))
        htm16Ids = mesh16.lookup_id(raList, decList)
        htm13Ids = mesh13.lookup_id(raList, decList)
        htm10Ids = mesh10.lookup_id(raList, decList)
        log.debug(
            'finshed calculating htmIds for batch of %s rows in %s db table' %
            (
                batchSize,
                tableName,
            ))

        if cartesian:
            log.debug(
                'calculating cartesian coordinates for batch of %s rows in %s db table'
                % (
                    batchSize,
                    tableName,
                ))
            cx = []
            cy = []
            cz = []
            for r, d in zip(raList, decList):
                r = math.radians(r)
                d = math.radians(d)
                cos_dec = math.cos(d)
                cx.append(math.cos(r) * cos_dec)
                cy.append(math.sin(r) * cos_dec)
                cz.append(math.sin(d))

            updates = []
            updates[:] = [{
                "htm16ID": int(h16),
                "htm13ID": int(h13),
                "htm10ID": int(h10),
                primaryIdColumnName: pid,
                "cx": float(ccx),
                "cy": float(ccy),
                "cz": float(ccz)
            } for h16, h13, h10, pid, ccx, ccy, ccz in zip(
                htm16Ids, htm13Ids, htm10Ids, pIdList, cx, cy, cz)]

            log.debug(
                'finished calculating cartesian coordinates for batch of %s rows in %s db table'
                % (
                    batchSize,
                    tableName,
                ))
        else:
            log.debug('building the sqlquery')
            updates = []
            # updates[:] = ["UPDATE `%(tableName)s` SET htm16ID=%(h16)s, htm13ID=%(h13)s, htm10ID=%(h10)s where %(primaryIdColumnName)s = '%(pid)s';" % locals() for h16,
            # h13, h10, pid in zip(htm16Ids, htm13Ids, htm10Ids, pIdList)]
            updates[:] = [{
                "htm16ID": int(h16),
                "htm13ID": int(h13),
                "htm10ID": int(h10),
                primaryIdColumnName: pid
            }
                          for h16, h13, h10, pid in zip(
                              htm16Ids, htm13Ids, htm10Ids, pIdList)]
            log.debug('finshed building the sqlquery')

        if len(updates):
            log.debug(
                'starting to update the HTMIds for new objects in the %s db table'
                % (tableName, ))

            # USE dbSettings & dbConn TO ACTIVATE MULTIPROCESSING
            insert_list_of_dictionaries_into_database_tables(
                dbConn=dbConn,
                log=log,
                dictList=updates,
                dbTableName=tableName,
                uniqueKeyList=[],
                dateModified=False,
                batchSize=20000,
                replace=True,
                dbSettings=dbSettings,
                dateCreated=False)

            # writequery(
            #     log=log,
            #     sqlQuery=sqlQuery,
            #     dbConn=dbConn,
            # )
            log.debug(
                'finished updating the HTMIds for new objects in the %s db table'
                % (tableName, ))
        else:
            log.debug('no HTMIds to add to the %s db table' % (tableName, ))

        percent = float(count) * 100. / float(totalCount)
        print(
            "%(count)s / %(totalCount)s htmIds added to %(tableName)s (%(percent)0.5f%% complete)"
            % locals())
        end = time.time()
        timediff = end - start
        timediff = timediff * 1000000. / float(batchSize)
        print("Update speed: %(timediff)0.2fs/1e6 rows\n" % locals())

    # APPLY INDEXES IF NEEDED
    sqlQuery = ""
    for index in ["htm10ID", "htm13ID", "htm16ID"]:
        log.debug('adding %(index)s index to %(tableName)s' % locals())
        iname = "idx_" + index
        asqlQuery = u"""
            SELECT COUNT(1) IndexIsThere FROM INFORMATION_SCHEMA.STATISTICS
                WHERE table_schema=DATABASE() AND table_name='%(tableName)s' AND index_name='%(iname)s';
        """ % locals()
        count = readquery(log=log, sqlQuery=asqlQuery,
                          dbConn=dbConn)[0]["IndexIsThere"]

        if count == 0:
            if not len(sqlQuery):
                sqlQuery += u"""
                    ALTER TABLE %(tableName)s ADD INDEX `%(iname)s` (`%(index)s` ASC)
                """ % locals()
            else:
                sqlQuery += u""", ADD INDEX `%(iname)s` (`%(index)s` ASC)""" % locals(
                )
    if len(sqlQuery):
        writequery(
            log=log,
            sqlQuery=sqlQuery + ";",
            dbConn=dbConn,
        )
    log.debug('finished adding indexes to %(tableName)s' % locals())

    if reindex:
        print("Re-enabling keys within the '%(tableName)s' table" % locals())
        sqlQuery = """ALTER TABLE `%(tableName)s` enable keys""" % locals()
        writequery(log=log, sqlQuery=sqlQuery, dbConn=dbConn)

    print("All HTMIds added to %(tableName)s" % locals())

    log.debug('completed the ``add_htm_ids_to_mysql_database_table`` function')
    return None
Example #15
0
    def get(self, days):
        """
        *download a cache of ATLAS nights data*

        **Key Arguments:**
            - ``days`` -- the number of days data to cache locally

        **Return:**
            - None

        **Usage:**

            See class docstring
        """
        self.log.info('starting the ``get`` method')

        self._remove_processed_data()

        archivePath = self.settings["atlas archive path"]
        self._update_day_tracker_table()
        mjds = self._determine_mjds_to_download(days=days)

        if len(mjds) == 0:
            return

        dbConn = self.atlasMoversDBConn

        # DOWNLOAD THE DATA IN PARALLEL
        results = fmultiprocess(log=self.log,
                                function=_download_one_night_of_atlas_data,
                                timeout=3600,
                                inputArray=mjds,
                                archivePath=archivePath)

        global dbSettings

        dbSettings = self.settings["database settings"]["atlasMovers"]

        for d in results:
            if d and len(d[0]):
                insert_list_of_dictionaries_into_database_tables(
                    dbConn=dbConn,
                    log=self.log,
                    dictList=d[0],
                    dbTableName="atlas_exposures",
                    dateModified=True,
                    batchSize=10000,
                    replace=True,
                    dbSettings=dbSettings)

        # UPDATE BOOKKEEPING
        mjds = []
        mjds[:] = [r[1] for r in results if (r and r[1] is not None)]
        mjds = (',').join(mjds)

        if len(mjds):
            sqlQuery = """update atlas_exposures set local_data = 1 where floor(mjd) in (%(mjds)s);
        update day_tracker set processed = 1 where mjd in (%(mjds)s);""" % locals(
            )
            writequery(
                log=self.log,
                sqlQuery=sqlQuery,
                dbConn=self.atlasMoversDBConn,
            )

        bk = bookkeeper(log=self.log, settings=self.settings, fullUpdate=False)
        bk.clean_all()

        self.log.info('completed the ``get`` method')
        return None
Example #16
0
    def _updated_row_counts_in_tcs_helper_catalogue_tables_info(self):
        """ updated row counts in tcs catalogue tables

        .. todo ::

            - update key arguments values and definitions with defaults
            - update return values and definitions
            - update usage examples and text
            - update docstring text
            - check sublime snippet exists
            - clip any useful text to docs mindmap
            - regenerate the docs and check redendering of this docstring
        """
        self.log.debug(
            'starting the ``_updated_row_counts_in_tcs_helper_catalogue_tables_info`` method'
        )

        sqlQuery = u"""
            select * from tcs_helper_catalogue_tables_info where table_name like "%%stream" or (number_of_rows is null and legacy_table = 0)
        """ % locals()
        rows = readquery(log=self.log,
                         sqlQuery=sqlQuery,
                         dbConn=self.cataloguesDbConn,
                         quiet=False)

        for row in rows:
            tbName = row["table_name"]

            sqlQuery = u"""
                update tcs_helper_catalogue_tables_info set number_of_rows = (select count(*) as count from %(tbName)s) where table_name = "%(tbName)s"
            """ % locals()
            writequery(
                log=self.log,
                sqlQuery=sqlQuery,
                dbConn=self.cataloguesDbConn,
            )

        sqlQuery = u"""
            select * from tcs_helper_catalogue_views_info where (number_of_rows is null and legacy_view = 0)
        """ % locals()
        rows = readquery(log=self.log,
                         sqlQuery=sqlQuery,
                         dbConn=self.cataloguesDbConn,
                         quiet=False)

        for row in rows:
            tbName = row["view_name"]
            print tbName

            sqlQuery = u"""
                update tcs_helper_catalogue_views_info set number_of_rows = (select count(*) as count from %(tbName)s) where view_name = "%(tbName)s"
            """ % locals()
            writequery(
                log=self.log,
                sqlQuery=sqlQuery,
                dbConn=self.cataloguesDbConn,
            )

        self.log.debug(
            'completed the ``_updated_row_counts_in_tcs_helper_catalogue_tables_info`` method'
        )
        return None
Example #17
0
            raise IOError(message)

        fitsDict = {}
        for l in thisData.split("\n"):
            kw = l.split("=")[0].strip()
            if kw in fitskw.keys() and kw not in fitsDict.keys():
                fitsDict[fitskw[kw]] = l.split("=")[1].split(
                    "/")[0].strip().replace("'", "")

        if len(fitsDict) == 7:
            allData.append(fitsDict)

    sqlQuery = """
update atlas_exposures set dev_flag = 1 where dev_flag = 0 and floor(mjd) in (select mjd from day_tracker where dev_flag = 1);""" % locals(
    )
    writequery(log=log, sqlQuery=sqlQuery, dbConn=atlasMoversDBConn)

    return (allData, str(int(mjd)))


class download():
    """
    *The worker class for the download module*

    **Key Arguments:**
        - ``log`` -- logger
        - ``settings`` -- the settings dictionary

    **Usage:**

        To setup your logger, settings and database connections, please use the ``fundamentals`` package (`see tutorial here <http://fundamentals.readthedocs.io/en/latest/#tutorial>`_). 
Example #18
0
    def _create_tcs_help_tables(
            self):
        """* create tcs help tables*

        **Key Arguments**

        # -
        

        **Return**

        - None
        

        **Usage**

        

        ```python
        usage code 
        ```

        ---

        ```eval_rst
        .. todo::

            - add usage info
            - create a sublime snippet for usage
            - write a command-line tool for this method
            - update package tutorial with command-line tool info if needed
        ```
        """
        self.log.debug('starting the ``_create_tcs_help_tables`` method')

        sqlQuery = """
        CREATE TABLE IF NOT EXISTS `tcs_helper_catalogue_tables_info` (
          `id` smallint(5) unsigned NOT NULL,
          `table_name` varchar(100) COLLATE utf8_unicode_ci DEFAULT NULL,
          `description` varchar(60) COLLATE utf8_unicode_ci DEFAULT NULL,
          `url` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
          `number_of_rows` bigint(20) DEFAULT NULL,
          `reference_url` varchar(200) COLLATE utf8_unicode_ci DEFAULT NULL,
          `reference_text` varchar(70) COLLATE utf8_unicode_ci DEFAULT NULL,
          `notes` text COLLATE utf8_unicode_ci,
          `vizier_link` varchar(200) COLLATE utf8_unicode_ci DEFAULT NULL,
          `in_ned` tinyint(4) DEFAULT NULL,
          `object_types` varchar(100) COLLATE utf8_unicode_ci DEFAULT NULL,
          `version_number` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
          `last_updated` datetime DEFAULT NULL,
          `legacy_table` tinyint(4) DEFAULT '0',
          `old_table_name` varchar(100) COLLATE utf8_unicode_ci DEFAULT NULL,
          `raColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
          `decColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
          `catalogue_object_subtypeColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
          `catalogue_object_idColName` varchar(100) COLLATE utf8_unicode_ci DEFAULT NULL,
          `zColName` varchar(100) COLLATE utf8_unicode_ci DEFAULT NULL,
          `distanceColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
          `object_type_accuracy` tinyint(2) DEFAULT NULL,
          `semiMajorColName` varchar(100) COLLATE utf8_unicode_ci DEFAULT NULL,
          `semiMajorToArcsec` float DEFAULT NULL,
          `transientStream` tinyint(4) DEFAULT '0',
          `photoZColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
          `photoZErrColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
          `UColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
          `UErrColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
          `BColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
          `BErrColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
          `VColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
          `VErrColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
          `RColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
          `RErrColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
          `IColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
          `IErrColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
          `JColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
          `JErrColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
          `HColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
          `HErrColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
          `KColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
          `KErrColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
          `_uColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
          `_uErrColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
          `_gColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
          `_gErrColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
          `_rColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
          `_rErrColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
          `_iColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
          `_iErrColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
          `_zColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
          `_zErrColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
          `_yColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
          `_yErrColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
          `unkMagColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
          `unkMagErrColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
          `GColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
          `GErrColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL,
          PRIMARY KEY (`id`)
        ) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;       
        """

        writequery(
            log=self.log,
            sqlQuery=sqlQuery,
            dbConn=self.cataloguesDbConn
        )

        sqlQuery = """
        CREATE TABLE IF NOT EXISTS `tcs_helper_catalogue_views_info` (
              `id` smallint(5) unsigned NOT NULL AUTO_INCREMENT,
              `view_name` varchar(100) COLLATE utf8_unicode_ci DEFAULT NULL,
              `number_of_rows` bigint(20) DEFAULT NULL,
              `object_type` varchar(100) COLLATE utf8_unicode_ci DEFAULT NULL,
              `legacy_view` tinyint(4) DEFAULT '0',
              `old_view_name` varchar(100) COLLATE utf8_unicode_ci DEFAULT NULL,
              `table_id` int(11) DEFAULT NULL,
              PRIMARY KEY (`id`)
            ) ENGINE=MyISAM AUTO_INCREMENT=50 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;
        """

        writequery(
            log=self.log,
            sqlQuery=sqlQuery,
            dbConn=self.cataloguesDbConn
        )

        self.log.debug('completed the ``_create_tcs_help_tables`` method')
        return None
Example #19
0
    def ingest(self, withinLastDays=False):
        """*Ingest the data into the marshall feeder survey table*

        **Key Arguments**

        - ``withinLastDays`` -- note this will be handle by the transientNamer import to the database

        """
        self.log.debug('starting the ``ingest`` method')

        # UPDATE THE TNS SPECTRA TABLE WITH EXTRA INFOS
        from fundamentals.mysql import writequery
        sqlQuery = """CALL `update_tns_tables`();""" % locals()
        writequery(log=self.log, sqlQuery=sqlQuery, dbConn=self.dbConn)

        # PARSE TNS
        tns = search(log=self.log,
                     discInLastDays=withinLastDays,
                     settings=self.settings)

        lists = [tns.sources, tns.photometry, tns.files, tns.spectra]
        tableNames = [
            "tns_sources", "tns_photometry", "tns_files", "tns_spectra"
        ]

        for l, t in zip(lists, tableNames):
            # USE dbSettings TO ACTIVATE MULTIPROCESSING - INSERT LIST OF
            # DICTIONARIES INTO DATABASE
            insert_list_of_dictionaries_into_database_tables(
                dbConn=self.dbConn,
                log=self.log,
                dictList=l,
                dbTableName=t,
                dateModified=True,
                dateCreated=True,
                batchSize=2500,
                replace=True,
                dbSettings=self.settings["database settings"])

        # INSERT THE SOURCES TABLE
        self.insert_into_transientBucket()

        # NOW THE SPECTRA TABLE
        self.fsTableName = "tns_spectra"
        self.survey = "tns"
        self.insert_into_transientBucket(importUnmatched=False)

        # NOW THE PHOTOMETRY TABLE
        self.fsTableName = "tns_photometry"
        self.survey = "tns"
        self.insert_into_transientBucket(importUnmatched=False)

        # ALSO MATCH NEW ASTRONOTES
        sqlQuery = """CALL sync_marshall_feeder_survey_transientBucketId('astronotes_transients');""" % locals(
        )
        writequery(log=self.log, sqlQuery=sqlQuery, dbConn=self.dbConn)

        # CLEAN UP TASKS TO MAKE THE TICKET UPDATE
        self.clean_up()

        self.log.debug('completed the ``ingest`` method')
        return None
Example #20
0
    def _feeder_survey_transientbucket_crossmatch(self):
        """*crossmatch remaining unique, unmatched sources in feeder survey with sources in the transientbucket & copy matched feeder survey rows to the transientbucket*

        **Return**

        - ``unmatched`` -- a list of the unmatched (i.e. new to the marshall) feeder survey surveys

        """
        self.log.debug(
            'starting the ``_feeder_survey_transientbucket_crossmatch`` method'
        )

        fsTableName = self.fsTableName

        # GET THE COLUMN MAP FOR THE FEEDER SURVEY TABLE
        sqlQuery = u"""
            SELECT * FROM marshall_fs_column_map where fs_table_name = '%(fsTableName)s' and transientBucket_column in ('name','raDeg','decDeg','limitingMag')
        """ % locals()
        rows = readquery(log=self.log,
                         sqlQuery=sqlQuery,
                         dbConn=self.dbConn,
                         quiet=False)

        columns = {}
        for row in rows:
            columns[row["transientBucket_column"]] = row["fs_table_column"]

        if "raDeg" not in columns:
            print(f"No coordinates to match in the {fsTableName} table")
            return []

        # BUILD QUERY TO GET UNIQUE UN-MATCHED SOURCES
        fs_name = columns["name"]
        self.fs_name = fs_name
        fs_ra = columns["raDeg"]
        fs_dec = columns["decDeg"]
        if 'limitingMag' in columns:
            fs_lim = columns["limitingMag"]
            limitClause = " and %(fs_lim)s = 0 " % locals()
        else:
            limitClause = ""
        sqlQuery = u"""
            select %(fs_name)s, avg(%(fs_ra)s) as %(fs_ra)s, avg(%(fs_dec)s) as %(fs_dec)s from %(fsTableName)s where ingested = 0 %(limitClause)s and %(fs_ra)s is not null and %(fs_dec)s is not null group by %(fs_name)s 
        """ % locals()

        rows = readquery(log=self.log,
                         sqlQuery=sqlQuery,
                         dbConn=self.dbConn,
                         quiet=False)

        # STOP IF NO MATCHES
        if not len(rows):
            return []

        # SPLIT INTO BATCHES SO NOT TO OVERWHELM MEMORY
        batchSize = 200
        total = len(rows)
        batches = int(old_div(total, batchSize))
        start = 0
        end = 0
        theseBatches = []
        for i in range(batches + 1):
            end = end + batchSize
            start = i * batchSize
            thisBatch = rows[start:end]
            theseBatches.append(thisBatch)

        unmatched = []
        ticker = 0
        for batch in theseBatches:

            fs_name_list = []
            fs_ra_list = []
            fs_dec_list = []
            fs_name_list = [row[fs_name] for row in batch if row[fs_ra]]
            fs_ra_list = [row[fs_ra] for row in batch if row[fs_ra]]
            fs_dec_list = [row[fs_dec] for row in batch if row[fs_ra]]

            ticker += len(fs_name_list)
            print(
                "Matching %(ticker)s/%(total)s sources in the %(fsTableName)s against the transientBucket table"
                % locals())

            # CONESEARCH TRANSIENT BUCKET FOR PRE-KNOWN SOURCES FROM OTHER
            # SURVEYS
            from HMpTy.mysql import conesearch
            cs = conesearch(log=self.log,
                            dbConn=self.dbConn,
                            tableName="transientBucket",
                            columns="transientBucketId, name",
                            ra=fs_ra_list,
                            dec=fs_dec_list,
                            radiusArcsec=3.5,
                            separations=True,
                            distinct=True,
                            sqlWhere="masterIDFlag=1",
                            closest=True)
            matchIndies, matches = cs.search()

            # CREATE SQL QUERY TO UPDATE MATCHES IN FS TABLE WITH MATCHED
            # TRANSIENTBUCKET IDs
            updates = []
            originalList = matches.list
            originalTotal = len(originalList)

            print(
                "Adding %(originalTotal)s new %(fsTableName)s transient detections to the transientBucket table"
                % locals())
            if originalTotal:
                updates = []
                updates[:] = [
                    "update " + fsTableName + " set transientBucketId = " +
                    str(o['transientBucketId']) + " where " + fs_name +
                    " = '" + str(fs_name_list[m]) +
                    "' and transientBucketId is null;"
                    for m, o in zip(matchIndies, originalList)
                ]
                updates = ("\n").join(updates)
                writequery(log=self.log, sqlQuery=updates, dbConn=self.dbConn)

            # RETURN UNMATCHED TRANSIENTS
            for i, v in enumerate(fs_name_list):
                if i not in matchIndies:
                    unmatched.append(v)

        # COPY MATCHED ROWS TO TRANSIENTBUCKET
        self._feeder_survey_transientbucket_name_match_and_import()

        self.log.debug(
            'completed the ``_feeder_survey_transientbucket_crossmatch`` method'
        )
        return unmatched
Example #21
0
    def _update_tcs_helper_catalogue_views_info_with_new_views(self):
        """ update tcs helper catalogue tables info with new tables

        .. todo ::

            - update key arguments values and definitions with defaults
            - update return values and definitions
            - update usage examples and text
            - update docstring text
            - check sublime snippet exists
            - clip any useful text to docs mindmap
            - regenerate the docs and check redendering of this docstring
        """
        self.log.debug(
            'starting the ``_update_tcs_helper_catalogue_views_info_with_new_views`` method'
        )

        sqlQuery = u"""
            SELECT max(id) as thisId FROM tcs_helper_catalogue_views_info;
        """ % locals()
        thisId = readquery(log=self.log,
                           sqlQuery=sqlQuery,
                           dbConn=self.cataloguesDbConn,
                           quiet=False)
        try:
            highestId = thisId[0]["thisId"] + 1
        except:
            highestId = 1

        sqlQuery = u"""
            SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_TYPE='VIEW' AND TABLE_SCHEMA like '%%catalogues%%' and TABLE_NAME like "tcs_view%%" and TABLE_NAME not like "%%helper%%";
        """ % locals()
        tablesInDatabase = readquery(log=self.log,
                                     sqlQuery=sqlQuery,
                                     dbConn=self.cataloguesDbConn,
                                     quiet=False)

        sqlQuery = u"""
            SELECT view_name FROM tcs_helper_catalogue_views_info;
        """ % locals()
        tableList = readquery(log=self.log,
                              sqlQuery=sqlQuery,
                              dbConn=self.cataloguesDbConn,
                              quiet=False)
        tbList = []
        for tb in tableList:
            tbList.append(tb["view_name"])

        for tb in tablesInDatabase:
            if tb["TABLE_NAME"] not in tbList:
                thisViewName = tb["TABLE_NAME"]
                print "`%(thisViewName)s` added to `tcs_helper_catalogue_views_info` database table" % locals(
                )
                sqlQuery = u"""
                    INSERT INTO tcs_helper_catalogue_views_info (
                            id,
                            view_name
                        )
                        VALUES (
                            %(highestId)s,
                            "%(thisViewName)s"
                    )""" % locals()
                writequery(
                    log=self.log,
                    sqlQuery=sqlQuery,
                    dbConn=self.cataloguesDbConn,
                )
                highestId += 1

        self.log.debug(
            'completed the ``_update_tcs_helper_catalogue_views_info_with_new_views`` method'
        )
        return None
    "/Users/Dave/.config/HMpTy/HMpTy.yaml", 'r')
settings = yaml.load(stream)
stream.close()

# SETUP AND TEARDOWN FIXTURE FUNCTIONS FOR THE ENTIRE MODULE
moduleDirectory = os.path.dirname(__file__)
utKit = utKit(moduleDirectory)
log, dbConn, pathToInputDir, pathToOutputDir = utKit.setupModule()
utKit.tearDownModule()

from fundamentals.mysql import writequery
sqlQuery = """ALTER TABLE tcs_cat_ned_d_v10_2_0 DROP COLUMN htm16ID, DROP COLUMN htm10ID, DROP COLUMN htm13ID"""
try:
    writequery(
        log=log,
        sqlQuery=sqlQuery,
        dbConn=dbConn
    )
except:
    pass


class test_add_htm_ids_to_mysql_database_table():

    def test_add_htm_ids_to_mysql_database_table_function(self):

        from HMpTy.mysql import add_htm_ids_to_mysql_database_table
        add_htm_ids_to_mysql_database_table(
            raColName="raDeg",
            declColName="decDeg",
            tableName="tcs_cat_ned_d_v10_2_0",
Example #23
0
def _extract_phot_from_exposure(expIdIndex, log, cachePath, settings):
    """* extract phot from exposure*

    **Key Arguments:**
        - ``expIdIndex`` -- index of the exposure to extract the dophot photometry from. A tuple of expId and integer MJD
        - ``cachePath`` -- path to the cache of ATLAS data

    **Return:**
        - ``dophotRows`` -- the list of matched dophot rows
    """
    log.info('starting the ``_extract_phot_from_exposure`` method')

    global exposureIds

    expId = exposureIds[expIdIndex]

    # SETUP A DATABASE CONNECTION FOR THE remote database
    host = settings["database settings"]["atlasMovers"]["host"]
    user = settings["database settings"]["atlasMovers"]["user"]
    passwd = settings["database settings"]["atlasMovers"]["password"]
    dbName = settings["database settings"]["atlasMovers"]["db"]
    try:
        sshPort = settings["database settings"]["atlasMovers"]["tunnel"][
            "port"]
    except:
        sshPort = False
    thisConn = ms.connect(host=host,
                          user=user,
                          passwd=passwd,
                          db=dbName,
                          port=sshPort,
                          use_unicode=True,
                          charset='utf8',
                          client_flag=ms.constants.CLIENT.MULTI_STATEMENTS,
                          connect_timeout=3600)
    thisConn.autocommit(True)

    matchRadius = float(settings["dophot"]["search radius"])

    dophotFilePath = cachePath + "/" + \
        expId[0][:3] + "/" + str(expId[1]) + "/" + expId[0] + ".dph"

    # TEST THE FILE EXISTS
    exists = os.path.exists(dophotFilePath)
    expId = expId[0]
    if not exists:

        sqlQuery = """update atlas_exposures set dophot_match = 99 where expname = "%(expId)s" """ % locals(
        )
        writequery(
            log=log,
            sqlQuery=sqlQuery,
            dbConn=thisConn,
        )
        log.info(
            'the dophot file %(expId)s.dph is missing from the local ATLAS data cache'
            % locals())
        return []

    try:
        log.debug("attempting to open the file %s" % (dophotFilePath, ))
        dophotFile = codecs.open(dophotFilePath, encoding='utf-8', mode='r')
        dophotData = dophotFile.read()
        dophotFile.close()
    except IOError, e:
        message = 'could not open the file %s' % (dophotFilePath, )
        log.critical(message)
        raise IOError(message)
Example #24
0
    def get(self):
        """
        *get the panstarrs_location_stamps object*
        """
        self.log.debug('starting the ``get`` method')

        # FOR A SINGLE TRANSIENT
        if self.transientId:
            transientId = self.transientId
            sqlQuery = u"""
                select t.transientBucketId, t.raDeg,t.decDeg from pesstoObjects p, transientBucketSummaries t where p.transientBucketId = t.transientBucketId and t.transientBucketId = %(transientId)s;
            """ % locals()
        # OR THE NEXT 200 TRANSIENTS NEEDING STAMPS
        else:
            # GET NEXT 200 TRANSIENTS NEEDING PANSTARRS STAMPS
            sqlQuery = u"""
                select t.transientBucketId, t.raDeg,t.decDeg from pesstoObjects p, transientBucketSummaries t where ps1_map is null and p.transientBucketId = t.transientBucketId order by t.transientBucketId desc limit 200;
            """ % locals()
        rows = readquery(log=self.log, sqlQuery=sqlQuery, dbConn=self.dbConn)

        # FOR EACH TRANSIENT DOWNLOAD STAMP TO CACHE DIRECTORY
        downloadDirectoryPath = self.settings["downloads"][
            "transient cache directory"]

        for row in rows:
            transientBucketId = row["transientBucketId"]
            downloadPath = "%s/%s" % (downloadDirectoryPath, transientBucketId)
            ra = row["raDeg"]
            dec = row["decDeg"]

            fitsPaths, jpegPaths, colorPath = downloader(
                log=self.log,
                settings=self.settings,
                downloadDirectory=downloadPath,
                fits=False,
                jpeg=False,
                arcsecSize=60,
                filterSet='gri',
                color=True,
                singleFilters=False,
                ra=ra,
                dec=dec,
                imageType="stack"  # warp | stack
            ).get()

            # CHECK FOR FAILED IMAGES AND FLAG IN DATABASE
            if len(colorPath) == 0 or not colorPath[0]:
                sqlQuery = u"""
                    update pesstoObjects set ps1_map = 0 where transientBucketId = %(transientBucketId)s
                """ % locals()
                writequery(log=self.log, sqlQuery=sqlQuery, dbConn=self.dbConn)
                continue

            source = colorPath[0]
            destination = downloadPath + "/ps1_map_color.jpeg"
            try:
                os.rename(source, destination)

                # DOWNLOAD THE COLOR IMAGE
                myimage = image(log=self.log,
                                settings=self.settings,
                                imagePath=destination,
                                arcsecSize=60,
                                crosshairs=True,
                                transient=False,
                                scale=True,
                                invert=False,
                                greyscale=False).get()

                # UPDATE DATABASE FLAG
                sqlQuery = u"""
                    update pesstoObjects set ps1_map = 1 where transientBucketId = %(transientBucketId)s
                """ % locals()

                writequery(log=self.log, sqlQuery=sqlQuery, dbConn=self.dbConn)
            except:
                self.log.warning(
                    "Could not process the image %(destination)s" % locals())

        self.log.debug('completed the ``get`` method')
        return None
Example #25
0
def main(arguments=None):
    """
    *The main function used when `cl_utils.py` is run as a single script from the cl, or when installed as a cl command*
    """
    # setup the command-line util settings
    su = tools(arguments=arguments,
               docString=__doc__,
               logLevel="WARNING",
               options_first=False,
               projectName="marshallEngine",
               defaultSettingsFile=True)
    arguments, settings, log, dbConn = su.setup()

    # tab completion for raw_input
    readline.set_completer_delims(' \t\n;')
    readline.parse_and_bind("tab: complete")
    readline.set_completer(tab_complete)

    # UNPACK REMAINING CL ARGUMENTS USING `EXEC` TO SETUP THE VARIABLE NAMES
    # AUTOMATICALLY
    a = {}
    for arg, val in list(arguments.items()):
        if arg[0] == "-":
            varname = arg.replace("-", "") + "Flag"
        else:
            varname = arg.replace("<", "").replace(">", "")
        a[varname] = val
        if arg == "--dbConn":
            dbConn = val
            a["dbConn"] = val
        log.debug('%s = %s' % (
            varname,
            val,
        ))

    ## START LOGGING ##
    startTime = times.get_now_sql_datetime()
    log.info('--- STARTING TO RUN THE cl_utils.py AT %s' % (startTime, ))

    init = a["init"]
    clean = a["clean"]
    iimport = a["import"]
    lightcurve = a["lightcurve"]
    transientBucketId = a["transientBucketId"]
    survey = a["survey"]
    withInLastDay = a["withInLastDay"]
    settingsFlag = a["settingsFlag"]

    # set options interactively if user requests
    if "interactiveFlag" in a and a["interactiveFlag"]:

        # load previous settings
        moduleDirectory = os.path.dirname(__file__) + "/resources"
        pathToPickleFile = "%(moduleDirectory)s/previousSettings.p" % locals()
        try:
            with open(pathToPickleFile):
                pass
            previousSettingsExist = True
        except:
            previousSettingsExist = False
        previousSettings = {}
        if previousSettingsExist:
            previousSettings = pickle.load(open(pathToPickleFile, "rb"))

        # x-raw-input
        # x-boolean-raw-input
        # x-raw-input-with-default-value-from-previous-settings

        # save the most recently used requests
        pickleMeObjects = []
        pickleMe = {}
        theseLocals = locals()
        for k in pickleMeObjects:
            pickleMe[k] = theseLocals[k]
        pickle.dump(pickleMe, open(pathToPickleFile, "wb"))

    if a["init"]:
        from os.path import expanduser
        home = expanduser("~")
        filepath = home + "/.config/marshallEngine/marshallEngine.yaml"
        try:
            cmd = """open %(filepath)s""" % locals()
            p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
        except:
            pass
        try:
            cmd = """start %(filepath)s""" % locals()
            p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
        except:
            pass
        return

    # CALL FUNCTIONS/OBJECTS
    # DEFAULT VALUES
    if not withInLastDay:
        withInLastDay = 30

    # CALL FUNCTIONS/OBJECTS
    if clean:
        # RESCUE ORPHANED TRANSIENTS - NO MASTER ID FLAG
        print("rescuing orphaned transients")
        from fundamentals.mysql import writequery

        procedureNames = [
            "update_transients_with_no_masteridflag",
            "insert_new_transients_into_transientbucketsummaries",
            "resurrect_objects", "update_sherlock_xmatch_counts",
            "update_inbox_auto_archiver", "update_transient_akas"
        ]

        # CALL EACH PROCEDURE
        for p in procedureNames:
            sqlQuery = "CALL `%(p)s`();" % locals()
            writequery(
                log=log,
                sqlQuery=sqlQuery,
                dbConn=dbConn,
            )

        # UPDATE THE TRANSIENT BUCKET SUMMARY TABLE IN THE MARSHALL DATABASE
        from marshallEngine.housekeeping import update_transient_summaries
        updater = update_transient_summaries(log=log,
                                             settings=settings,
                                             dbConn=dbConn).update()

    if iimport:
        if survey.lower() == "panstarrs":
            from marshallEngine.feeders.panstarrs.data import data
            from marshallEngine.feeders.panstarrs import images
        if survey.lower() == "atlas":
            from marshallEngine.feeders.atlas.data import data
            from marshallEngine.feeders.atlas import images
        if survey.lower() == "useradded":
            from marshallEngine.feeders.useradded.data import data
            from marshallEngine.feeders.useradded import images
        if survey.lower() == "tns":
            from marshallEngine.feeders.tns.data import data
            from marshallEngine.feeders.tns import images
        if survey.lower() == "ztf":
            from marshallEngine.feeders.ztf.data import data
            from marshallEngine.feeders.ztf import images
        ingester = data(log=log, settings=settings,
                        dbConn=dbConn).ingest(withinLastDays=withInLastDay)
        cacher = images(log=log, settings=settings,
                        dbConn=dbConn).cache(limit=3000)

        from marshallEngine.services import panstarrs_location_stamps
        ps_stamp = panstarrs_location_stamps(log=log,
                                             settings=settings,
                                             dbConn=dbConn).get()

    if lightcurve:
        from marshallEngine.lightcurves import marshall_lightcurves
        lc = marshall_lightcurves(log=log,
                                  dbConn=dbConn,
                                  settings=settings,
                                  transientBucketIds=transientBucketId)
        filepath = lc.plot()
        print(
            "The lightcurve plot for transient %(transientBucketId)s can be found here: %(filepath)s"
            % locals())

    if "dbConn" in locals() and dbConn:
        dbConn.commit()
        dbConn.close()
    ## FINISH LOGGING ##
    endTime = times.get_now_sql_datetime()
    runningTime = times.calculate_time_difference(startTime, endTime)
    log.info(
        '-- FINISHED ATTEMPT TO RUN THE cl_utils.py AT %s (RUNTIME: %s) --' % (
            endTime,
            runningTime,
        ))

    return
Example #26
0
# Recursively create missing directories
if not os.path.exists(pathToOutputDir):
    os.makedirs(pathToOutputDir)

# SETUP ALL DATABASE CONNECTIONS
from sherlock import database
db = database(log=log, settings=settings)
dbConns, dbVersions = db.connect()
transientsDbConn = dbConns["transients"]
cataloguesDbConn = dbConns["catalogues"]

try:
    from fundamentals.mysql import writequery
    sqlQuery = """drop table IF EXISTS tcs_cat_ned_stream;""" % locals()
    writequery(log=log, sqlQuery=sqlQuery, dbConn=cataloguesDbConn)
except:
    pass


class test_ned(unittest.TestCase):
    def test_ned_function(self):
        coordinateList = ["23.2323 -43.23434"]
        from sherlock.imports import ned
        catalogue = ned(log=log,
                        settings=settings,
                        coordinateList=coordinateList,
                        radiusArcsec=300)
        catalogue.ingest()

    def test_ned_function_exception(self):
Example #27
0
    def ingest(self, withinLastDays):
        """*Ingest the data into the marshall feeder survey table*

        **Key Arguments**

        - ``withinLastDays`` -- within the last number of days. *Default: 50*

        """
        self.log.debug('starting the ``ingest`` method')

        allLists = []
        csvDicts = self.get_csv_data(
            url=self.settings["panstarrs urls"]["ps13pi"]["summary csv"],
            user=self.settings["credentials"]["ps13pi"]["username"],
            pwd=self.settings["credentials"]["ps13pi"]["password"])
        allLists.extend(
            self._clean_data_pre_ingest(surveyName="ps13pi",
                                        withinLastDays=withinLastDays))

        csvDicts = self.get_csv_data(
            url=self.settings["panstarrs urls"]["ps13pi"]["recurrence csv"],
            user=self.settings["credentials"]["ps13pi"]["username"],
            pwd=self.settings["credentials"]["ps13pi"]["password"])
        allLists.extend(
            self._clean_data_pre_ingest(surveyName="ps13pi",
                                        withinLastDays=withinLastDays))

        csvDicts = self.get_csv_data(
            url=self.settings["panstarrs urls"]["ps23pi"]["summary csv"],
            user=self.settings["credentials"]["ps23pi"]["username"],
            pwd=self.settings["credentials"]["ps23pi"]["password"])
        allLists.extend(
            self._clean_data_pre_ingest(surveyName="ps23pi",
                                        withinLastDays=withinLastDays))

        csvDicts = self.get_csv_data(
            url=self.settings["panstarrs urls"]["ps23pi"]["recurrence csv"],
            user=self.settings["credentials"]["ps23pi"]["username"],
            pwd=self.settings["credentials"]["ps23pi"]["password"])
        allLists.extend(
            self._clean_data_pre_ingest(surveyName="ps23pi",
                                        withinLastDays=withinLastDays))

        csvDicts = self.get_csv_data(
            url=self.settings["panstarrs urls"]["pso3"]["summary csv"],
            user=self.settings["credentials"]["pso3"]["username"],
            pwd=self.settings["credentials"]["pso3"]["password"])
        allLists.extend(
            self._clean_data_pre_ingest(surveyName="pso3",
                                        withinLastDays=withinLastDays))
        csvDicts = self.get_csv_data(
            url=self.settings["panstarrs urls"]["pso3"]["recurrence csv"],
            user=self.settings["credentials"]["pso3"]["username"],
            pwd=self.settings["credentials"]["pso3"]["password"])
        allLists.extend(
            self._clean_data_pre_ingest(surveyName="pso3",
                                        withinLastDays=withinLastDays))

        self.dictList = allLists
        self._import_to_feeder_survey_table()

        self.insert_into_transientBucket()

        # FIX ODD PANSTARRS COORDINATES
        sqlQuery = """update transientBucket set raDeg = raDeg+360.0 where raDeg  < 0;""" % locals(
        )
        writequery(log=self.log, sqlQuery=sqlQuery, dbConn=self.dbConn)

        # CLEAN UP TASKS TO MAKE THE TICKET UPDATE
        self.clean_up()

        self.log.debug('completed the ``ingest`` method')
        return None
Example #28
0
def _insert_single_batch_into_database(batchIndex, log, dbTableName,
                                       uniqueKeyList, dateModified, replace,
                                       batchSize, reDatetime):
    """*summary of function*

    **Key Arguments:**
        - ``batchIndex`` -- the index of the batch to insert
        - ``dbConn`` -- mysql database connection
        - ``log`` -- logger

    **Return:**
        - None

    **Usage:**
        .. todo::

            add usage info
            create a sublime snippet for usage

        .. code-block:: python 

            usage code            
    """
    log.info('starting the ``_insert_single_batch_into_database`` function')

    global totalCount
    global globalDbConn
    global sharedList

    batch = sharedList[batchIndex]

    reDate = reDatetime

    if isinstance(globalDbConn, dict):
        # SETUP ALL DATABASE CONNECTIONS

        dbConn = database(log=log, dbSettings=globalDbConn,
                          autocommit=False).connect()
    else:
        dbConn = globalDbConn

    count = batch[1]
    if count > totalCount:
        count = totalCount
    ltotalCount = totalCount

    inserted = False
    while inserted == False:

        if not replace:
            insertVerb = "INSERT IGNORE"
        else:
            insertVerb = "INSERT IGNORE"

        uniKeys = set().union(*(d.keys() for d in batch[0]))
        tmp = []
        tmp[:] = [m.replace(" ", "_").replace("-", "_") for m in uniKeys]
        uniKeys = tmp

        myKeys = '`,`'.join(uniKeys)
        vals = [
            tuple([
                None if d[k] in ["None", None] else str(d[k]) for k in uniKeys
            ]) for d in batch[0]
        ]
        valueString = ("%s, " * len(vals[0]))[:-2]
        insertCommand = insertVerb + """ INTO `""" + dbTableName + \
            """` (`""" + myKeys + """`, dateCreated) VALUES (""" + \
            valueString + """, NOW())"""

        dup = ""
        if replace:
            dup = " ON DUPLICATE KEY UPDATE "
            for k in uniKeys:
                dup = """%(dup)s %(k)s=values(%(k)s),""" % locals()
            dup = """%(dup)s updated=1, dateLastModified=NOW()""" % locals()

        insertCommand = insertCommand + dup

        insertCommand = insertCommand.replace('\\""', '\\" "')
        insertCommand = insertCommand.replace('""', "null")
        insertCommand = insertCommand.replace('"None"', 'null')

        message = ""
        # log.debug('adding new data to the %s table; query: %s' %
        # (dbTableName, addValue))
        try:
            message = writequery(log=log,
                                 sqlQuery=insertCommand,
                                 dbConn=dbConn,
                                 Force=True,
                                 manyValueList=vals)
        except:
            theseInserts = []
            for aDict in batch[0]:

                insertCommand, valueTuple = convert_dictionary_to_mysql_table(
                    dbConn=dbConn,
                    log=log,
                    dictionary=aDict,
                    dbTableName=dbTableName,
                    uniqueKeyList=uniqueKeyList,
                    dateModified=dateModified,
                    returnInsertOnly=True,
                    replace=replace,
                    reDatetime=reDate,
                    skipChecks=True)
                theseInserts.append(valueTuple)

            message = ""
            # log.debug('adding new data to the %s table; query: %s' %
            # (dbTableName, addValue))
            message = writequery(log=log,
                                 sqlQuery=insertCommand,
                                 dbConn=dbConn,
                                 Force=True,
                                 manyValueList=theseInserts)

        if message == "unknown column":
            for aDict in batch:
                convert_dictionary_to_mysql_table(dbConn=dbConn,
                                                  log=log,
                                                  dictionary=aDict,
                                                  dbTableName=dbTableName,
                                                  uniqueKeyList=uniqueKeyList,
                                                  dateModified=dateModified,
                                                  reDatetime=reDate,
                                                  replace=replace)
        else:
            inserted = True

        dbConn.commit()

    log.info('completed the ``_insert_single_batch_into_database`` function')
    return "None"
Example #29
0
    pass
# COPY INPUT TO OUTPUT DIR
shutil.copytree(pathToInputDir, pathToOutputDir)

# Recursively create missing directories
if not os.path.exists(pathToOutputDir):
    os.makedirs(pathToOutputDir)

# xt-setup-unit-testing-files-and-folders

try:
    from fundamentals.mysql import writequery
    sqlQuery = """drop table IF EXISTS tcs_cat_ned_d_v1_0;""" % locals()
    writequery(
        log=log,
        sqlQuery=sqlQuery,
        dbConn=dbConn
    )
except:
    pass


class test_ned_d(unittest.TestCase):

    def test_ned_d_function(self):

        from sherlock.imports import ned_d
        catalogue = ned_d(
            log=log,
            settings=settings,
            pathToDataFile=pathToInputDir + "/ned_d_test.csv",
    def label_pointings_with_gw_ids(
            self):
        """
        *Attempt to label the PS1 pointing with the GW IDs*

        The GW metadata used to associate PS1 pointings is taken from the settings file

        **Return:**
            - None

         **Usage:**

            .. code-block:: python

                # ATTEMPT TO LABEL PS1 POINTINGS IN DATABASE WITH A GW ID
                from breaker import update_ps1_atlas_footprint_tables
                dbUpdater = update_ps1_atlas_footprint_tables(
                    log=log,
                    settings=settings
                )
                dbUpdater.label_pointings_with_gw_ids()
        """
        self.log.debug('starting the ``label_pointings_with_gw_ids`` method')

        # WAVE METADATA FOUND IN SETTINGS FILE
        for wave in self.settings["gravitational waves"]:

            # UNPACK THE PLOT PARAMETERS FROM THE SETTINGS FILE
            centralCoordinate = self.settings["gravitational waves"][
                wave]["plot"]["centralCoordinate"]
            raRange = float(self.settings["gravitational waves"][
                wave]["plot"]["raRange"])
            decRange = float(self.settings["gravitational waves"][
                wave]["plot"]["decRange"])

            raMax = (centralCoordinate[0] + raRange / 2.) + 5.
            raMin = (centralCoordinate[0] - raRange / 2.) - 5.
            decMax = (centralCoordinate[1] + decRange / 2.) + 5.
            decMin = (centralCoordinate[1] - decRange / 2.) - 5.

            mjdLower = self.settings["gravitational waves"][
                wave]["mjd"] - 21.
            mjdUpper = self.settings["gravitational waves"][
                wave]["mjd"] + 31

            if raMin > 0. and raMax < 360.:
                raWhere = """(raDeg > %(raMin)s and raDeg < %(raMax)s)""" % locals(
                )
            elif raMin < 0.:
                raMin2 = raMin + 360.
                raWhere = """((raDeg > 0. and raDeg < %(raMax)s) or raDeg > %(raMin2)s)""" % locals(
                )
            elif raMax > 360.:
                raMax2 = raMax - 360.
                raWhere = """((raDeg > %(raMin)s and raDeg < 360.) or raDeg < %(raMax2)s)""" % locals(
                )

            decWhere = """(decDeg > %(decMin)s and  decDeg < %(decMax)s)""" % locals(
            )

            mjdWhere = "(mjd>%(mjdLower)s and mjd<%(mjdUpper)s)" % locals()

            sqlQuery = u"""
                update ps1_pointings set gw_id = "%(wave)s" where %(raWhere)s and %(decWhere)s and %(mjdWhere)s and gw_id is null
            """ % locals()
            writequery(
                log=self.log,
                sqlQuery=sqlQuery,
                dbConn=self.ligo_virgo_wavesDbConn,
            )
            sqlQuery = u"""
                update ps1_pointings set gw_id = CONCAT(gw_id, " %(wave)s") where %(raWhere)s and %(decWhere)s and %(mjdWhere)s and gw_id is not null and gw_id not like "%%%(wave)s%%";
            """ % locals()
            writequery(
                log=self.log,
                sqlQuery=sqlQuery,
                dbConn=self.ligo_virgo_wavesDbConn,
            )

            sqlQuery = u"""
                update atlas_pointings set gw_id = "%(wave)s" where %(raWhere)s and %(decWhere)s and %(mjdWhere)s and gw_id is null
            """ % locals()
            writequery(
                log=self.log,
                sqlQuery=sqlQuery,
                dbConn=self.ligo_virgo_wavesDbConn,
            )
            sqlQuery = u"""
                update atlas_pointings set gw_id = CONCAT(gw_id, " %(wave)s") where %(raWhere)s and %(decWhere)s and %(mjdWhere)s and gw_id is not null and gw_id not like "%%%(wave)s%%";
            """ % locals()
            writequery(
                log=self.log,
                sqlQuery=sqlQuery,
                dbConn=self.ligo_virgo_wavesDbConn,
            )

            mjdWhere = mjdWhere.replace("mjd", "mjd_registered")
            sqlQuery = u"""
                update ps1_nightlogs set gw_id = "%(wave)s" where %(raWhere)s and %(decWhere)s and %(mjdWhere)s and gw_id is null and type = "OBJECT"
            """ % locals()
            writequery(
                log=self.log,
                sqlQuery=sqlQuery,
                dbConn=self.ligo_virgo_wavesDbConn,
            )
            sqlQuery = u"""
                update ps1_nightlogs set gw_id = CONCAT(gw_id, " %(wave)s") where %(raWhere)s and %(decWhere)s and %(mjdWhere)s and gw_id is not null and type = "OBJECT" and gw_id not like "%%%(wave)s%%";
            """ % locals()
            writequery(
                log=self.log,
                sqlQuery=sqlQuery,
                dbConn=self.ligo_virgo_wavesDbConn,
            )

        sqlQuery = u"""
            select count(*) as count from ps1_pointings where gw_id is null;
        """ % locals()

        count = readquery(
            log=self.log,
            sqlQuery=sqlQuery,
            dbConn=self.ligo_virgo_wavesDbConn,
            quiet=False
        )[0]["count"]

        print "PS1 pointings labelled with their associated GW id"

        if count == 0:
            print "    Note all pointings have been labelled with GW ID"
        else:
            print "    %(count)s pointings remain unlabelled with a GW ID" % locals()

        self.log.debug('completed the ``label_pointings_with_gw_ids`` method')
        return None
Example #31
0
    def _update_sdss_coverage(
            self):
        """ update sdss coverage

        .. todo ::

            - update key arguments values and definitions with defaults
            - update return values and definitions
            - update usage examples and text
            - update docstring text
            - check sublime snippet exists
            - clip any useful text to docs mindmap
            - regenerate the docs and check redendering of this docstring
        """
        self.log.debug('starting the ``_update_sdss_coverage`` method')

        tableName = self.dbTableName

        # SELECT THE LOCATIONS NEEDING TO BE CHECKED
        sqlQuery = u"""
            select primary_ned_id, primaryID, raDeg, decDeg, sdss_coverage from %(tableName)s where sdss_coverage is null and master_row = 1 and in_ned = 1 order by dist_mpc;
        """ % locals()
        rows = readquery(
            log=self.log,
            sqlQuery=sqlQuery,
            dbConn=self.cataloguesDbConn,
            quiet=False
        )

        totalCount = len(rows)
        count = 0
        for row in rows:
            count += 1
            if count > 1:
                # Cursor up three lines and clear
                sys.stdout.write("\x1b[1A\x1b[2K")
                sys.stdout.write("\x1b[1A\x1b[2K")
                sys.stdout.write("\x1b[1A\x1b[2K")

            if count > totalCount:
                count = totalCount
            percent = (float(count) / float(totalCount)) * 100.

            primaryID = row["primaryID"]
            raDeg = float(row["raDeg"])
            decDeg = float(row["decDeg"])
            primary_ned_id = row["primary_ned_id"]

            # SDSS CAN ONLY ACCEPT 60 QUERIES/MIN
            time.sleep(1.1)
            print "%(count)s / %(totalCount)s (%(percent)1.1f%%) NED galaxies checked for SDSS coverage" % locals()
            print "NED NAME: ", primary_ned_id

            # covered = True | False | 999 (i.e. not sure)
            sdss_coverage = check_coverage(
                log=self.log,
                ra=raDeg,
                dec=decDeg
            ).get()

            if sdss_coverage == 999:
                sdss_coverage_flag = "null"
            elif sdss_coverage == True:
                sdss_coverage_flag = 1
            elif sdss_coverage == False:
                sdss_coverage_flag = 0
            else:
                self.log.error('cound not get sdss coverage' % locals())
                sys.exit(0)

            # UPDATE THE DATABASE FLAG
            sqlQuery = u"""
                update %(tableName)s set sdss_coverage = %(sdss_coverage_flag)s where primaryID = %(primaryID)s
            """ % locals()
            writequery(
                log=self.log,
                sqlQuery=sqlQuery,
                dbConn=self.cataloguesDbConn,
            )

        self.log.debug('completed the ``_update_sdss_coverage`` method')
        return None
    def update_ned_database_table(
            self):
        """
        *Use Sherlock & Neddy to query NED and update the catalogues database for previously unseen/stale PS1 footprint areas*

        **Return:**
            - None

        **Usage:**

            .. code-block:: python

                # UPDATE THE NED STREAM FOR NEW PS1 FOOTPRINTS
                from breaker import update_ps1_atlas_footprint_tables
                dbUpdater = update_ps1_atlas_footprint_tables(
                    log=log,
                    settings=settings
                )
                dbUpdater.update_ned_database_table()
        """
        self.log.debug('starting the ``update_ned_database_table`` method')

        from sherlock.update_ned_stream import update_ned_stream

        numDisksToConesearch = 100
        rowCount = 100

        while rowCount > 0:

            sqlQuery = u"""
                select primaryId, raDeg as "ra", decDeg as "dec", htm16ID from ps1_pointings_subdisks where nedQueried = 0 limit %(numDisksToConesearch)s
            """ % locals()
            rows = readquery(
                log=self.log,
                sqlQuery=sqlQuery,
                dbConn=self.ligo_virgo_wavesDbConn,
                quiet=False
            )
            rowCount = len(rows)
            ids = []
            ids[:] = [str(row["primaryId"]) for row in rows]
            ids = ",".join(ids)

            if rowCount > 0:
                print "Selecting the next %(rowCount)s subdisks areas to conesearch against NED from the `ps1_pointings_subdisks` table" % locals()
            else:
                print "NED stream is up-to-date, no queries required" % locals()

            update_ned_stream(
                log=self.log,
                cataloguesDbConn=self.cataloguesDbConn,
                settings=self.settings,
                transientsMetadataList=rows
            ).get()

            if len(ids):
                sqlQuery = u"""
                    update ps1_pointings_subdisks set nedQueried = 1 where primaryId in (%(ids)s)
                """ % locals()
                writequery(
                    log=self.log,
                    sqlQuery=sqlQuery,
                    dbConn=self.ligo_virgo_wavesDbConn,
                )

            sqlQuery = u"""
                select count(*) as count from ps1_pointings_subdisks where nedQueried = 0
            """ % locals()
            count = readquery(
                log=self.log,
                sqlQuery=sqlQuery,
                dbConn=self.ligo_virgo_wavesDbConn,
                quiet=False
            )
            count = count[0]["count"]

            if rowCount > 0:
                print "NED stream updated for %(rowCount)s PS1 pointing sub-disks (%(count)s to go)" % locals()
                print "-----\n\n"

        self.log.debug('completed the ``update_ned_database_table`` method')
        return None
Example #33
0
    def _update_ned_query_history(self):
        """*Update the database helper table to give details of the ned cone searches performed*

        *Usage:*

            .. code-block:: python

                stream._update_ned_query_history()
        """
        self.log.debug('starting the ``_update_ned_query_history`` method')

        myPid = self.myPid

        # ASTROCALC UNIT CONVERTER OBJECT
        converter = unit_conversion(log=self.log)

        # UPDATE THE DATABASE HELPER TABLE TO GIVE DETAILS OF THE NED CONE
        # SEARCHES PERFORMED
        dataList = []
        for i, coord in enumerate(self.coordinateList):
            if isinstance(coord, str):
                ra = coord.split(" ")[0]
                dec = coord.split(" ")[1]
            elif isinstance(coord, tuple) or isinstance(coord, list):
                ra = coord[0]
                dec = coord[1]

            dataList.append({
                "raDeg": ra,
                "decDeg": dec,
                "arcsecRadius": self.radiusArcsec
            })

        if len(dataList) == 0:
            return None

        # CREATE TABLE IF NOT EXIST
        createStatement = """CREATE TABLE IF NOT EXISTS `tcs_helper_ned_query_history` (
  `primaryId` bigint(20) NOT NULL AUTO_INCREMENT,
  `raDeg` double DEFAULT NULL,
  `decDeg` double DEFAULT NULL,
  `dateCreated` datetime DEFAULT CURRENT_TIMESTAMP,
  `dateLastModified` datetime DEFAULT CURRENT_TIMESTAMP,
  `updated` varchar(45) DEFAULT '0',
  `arcsecRadius` int(11) DEFAULT NULL,
  `dateQueried` datetime DEFAULT CURRENT_TIMESTAMP,
  `htm16ID` bigint(20) DEFAULT NULL,
  `htm13ID` int(11) DEFAULT NULL,
  `htm10ID` int(11) DEFAULT NULL,
  PRIMARY KEY (`primaryId`),
  KEY `idx_htm16ID` (`htm16ID`),
  KEY `dateQueried` (`dateQueried`),
  KEY `dateHtm16` (`dateQueried`,`htm16ID`),
  KEY `idx_htm10ID` (`htm10ID`),
  KEY `idx_htm13ID` (`htm13ID`)
) ENGINE=MyISAM AUTO_INCREMENT=0 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;
        """
        writequery(log=self.log,
                   sqlQuery=createStatement,
                   dbConn=self.cataloguesDbConn)

        # USE dbSettings TO ACTIVATE MULTIPROCESSING
        insert_list_of_dictionaries_into_database_tables(
            dbConn=self.cataloguesDbConn,
            log=self.log,
            dictList=dataList,
            dbTableName="tcs_helper_ned_query_history",
            uniqueKeyList=[],
            dateModified=True,
            batchSize=10000,
            replace=True,
            dbSettings=self.settings["database settings"]["static catalogues"])

        # INDEX THE TABLE FOR LATER SEARCHES
        add_htm_ids_to_mysql_database_table(
            raColName="raDeg",
            declColName="decDeg",
            tableName="tcs_helper_ned_query_history",
            dbConn=self.cataloguesDbConn,
            log=self.log,
            primaryIdColumnName="primaryId")

        self.log.debug('completed the ``_update_ned_query_history`` method')
        return None
Example #34
0
    def add_data_to_database_table(self, dictList, createStatement=False):
        """*Import data in the list of dictionaries in the requested database table*

        Also adds HTMIDs and updates the sherlock-catalogue database helper table with the time-stamp of when the imported catlogue was last updated

        **Key Arguments:**
            - ``dictList`` - a list of dictionaries containing all the rows in the catalogue to be imported
            - ``createStatement`` - the table's mysql create statement (used to generate table if it does not yet exist in database). Default *False*

        **Usage:**

            .. code-block:: python 

                self.add_data_to_database_table(
                    dictList=dictList,
                    createStatement=createStatement
                )

        .. todo ::

            - Write a checklist for creating a new sherlock database importer
        """
        self.log.debug('starting the ``add_data_to_database_table`` method')

        if len(dictList) == 0:
            return

        myPid = self.myPid
        dbTableName = self.dbTableName

        if createStatement:
            writequery(
                log=self.log,
                sqlQuery=createStatement,
                dbConn=self.cataloguesDbConn,
            )

        insert_list_of_dictionaries_into_database_tables(
            dbConn=self.cataloguesDbConn,
            log=self.log,
            dictList=dictList,
            dbTableName=dbTableName,
            uniqueKeyList=[],
            dateModified=True,
            dateCreated=True,
            batchSize=10000,
            replace=True,
            dbSettings=self.settings["database settings"]["static catalogues"])

        self._add_htmids_to_database_table()

        cleaner = database_cleaner(log=self.log, settings=self.settings)
        cleaner._update_tcs_helper_catalogue_tables_info_with_new_tables()

        self._update_database_helper_table()

        print """Now:

- [ ] edit the `%(dbTableName)s` row in the sherlock catalogues database adding relevant column mappings, catalogue version number etc
- [ ] retire any previous version of this catlogue in the database. Renaming the catalogue-table by appending `legacy_` and also change the name in the `tcs_helper_catalogue_tables_info` table
- [ ] dupliate views from the previous catalogue version to point towards the new version and then delete the old views
- [ ] run the command `sherlock clean [-s <pathToSettingsFile>]` to clean up helper tables
- [ ] switch out the old catalogue table/views in your sherlock search algorithms in the yaml settings files
- [ ] run a test batch of transients to make sure catalogue is installed as expected

""" % locals()

        self.log.debug('completed the ``add_data_to_database_table`` method')
        return None
Example #35
0
    def insert_into_transientBucket(self,
                                    importUnmatched=True,
                                    updateTransientSummaries=True):
        """*insert objects/detections from the feeder survey table into the transientbucket*

        **Key Arguments**

        - ``importUnmatched`` -- import unmatched (new) transients into the marshall (not wanted in some circumstances)
        - ``updateTransientSummaries`` -- update the transient summaries and lightcurves? Can be True or False, or alternatively a specific transientBucketId


        This method aims to reduce crossmatching and load on the database by:

        1. automatically assign the transientbucket id to feeder survey detections where the object name is found in the transientbukcet (no spatial crossmatch required). Copy matched feeder survey rows to the transientbucket.
        2. crossmatch remaining unique, unmatched sources in feeder survey with sources in the transientbucket. Add associated transientBucketIds to matched feeder survey sources. Copy matched feeder survey rows to the transientbucket.
        3. assign a new transientbucketid to any feeder survey source not matched in steps 1 & 2. Copy these unmatched feeder survey rows to the transientbucket as new transient detections.

        **Return**

        - None


        **Usage**

        ```python
        ingester.insert_into_transientBucket()
        ```

        """
        self.log.debug(
            'starting the ``crossmatch_with_transientBucket`` method')

        fsTableName = self.fsTableName

        # 1. automatically assign the transientbucket id to feeder survey
        # detections where the object name is found in the transientbukcet (no
        # spatial crossmatch required). Copy matched feeder survey rows to the
        # transientbucket.
        self._feeder_survey_transientbucket_name_match_and_import()

        # 2. crossmatch remaining unique, unmatched sources in feeder survey
        # with sources in the transientbucket. Add associated
        # transientBucketIds to matched feeder survey sources. Copy matched
        # feeder survey rows to the transientbucket.
        from HMpTy.mysql import add_htm_ids_to_mysql_database_table
        add_htm_ids_to_mysql_database_table(
            raColName="raDeg",
            declColName="decDeg",
            tableName="transientBucket",
            dbConn=self.dbConn,
            log=self.log,
            primaryIdColumnName="primaryKeyId",
            dbSettings=self.settings["database settings"])
        unmatched = self._feeder_survey_transientbucket_crossmatch()

        # 3. assign a new transientbucketid to any feeder survey source not
        # matched in steps 1 & 2. Copy these unmatched feeder survey rows to
        # the transientbucket as new transient detections.
        if importUnmatched:
            self._import_unmatched_feeder_survey_sources_to_transientbucket(
                unmatched)

        # UPDATE OBSERVATION DATES FROM MJDs
        sqlQuery = "call update_transientbucket_observation_dates()"
        writequery(log=self.log, sqlQuery=sqlQuery, dbConn=self.dbConn)

        # UPDATE THE TRANSIENT BUCKET SUMMARY TABLE IN THE MARSHALL DATABASE
        if updateTransientSummaries:
            if isinstance(updateTransientSummaries, int) and not isinstance(
                    updateTransientSummaries, bool):
                transientBucketId = updateTransientSummaries
            else:
                transientBucketId = False
            from marshallEngine.housekeeping import update_transient_summaries
            updater = update_transient_summaries(
                log=self.log,
                settings=self.settings,
                dbConn=self.dbConn,
                transientBucketId=transientBucketId)
            updater.update()

        self.log.debug(
            'completed the ``crossmatch_with_transientBucket`` method')
        return None
Example #36
0
    def convert_sqlite_to_mysql(self):
        """*copy the contents of the sqlite database into the mysql database*

        See class docstring for usage
        """
        from fundamentals.renderer import list_of_dictionaries
        from fundamentals.mysql import directory_script_runner
        self.log.debug('starting the ``convert_sqlite_to_mysql`` method')

        con = lite.connect(self.pathToSqlite)
        con.row_factory = lite.Row
        cur = con.cursor()

        # GET ALL TABLE NAMES
        cur.execute("SELECT name FROM sqlite_master WHERE type='table';")
        tables = cur.fetchall()

        createStatements = []
        inserts = []
        for table in tables:
            table = table['name']
            if table == "sqlite_sequence":
                continue

            # CREATE TABLE collection_books (folder_id, fingerprint, primary key(folder_id, fingerprint));
            # GENEREATE THE MYSQL CREATE STATEMENTS FOR EACH TABLE
            cur.execute(
                "SELECT sql FROM sqlite_master WHERE name = '%(table)s';" %
                locals())
            createStatement = cur.fetchone()
            createStatement = createStatement[0].replace('"', '`') + ";"
            if "DEFAULT" not in createStatement:
                if "primary key(" in createStatement:
                    tmp = createStatement.split("primary key(")
                    tmp[0] = tmp[0].replace(",", " varchar(150) DEFAULT NULL,")
                    createStatement = ("primary key(").join(tmp)
                if "primary key," in createStatement:
                    tmp = createStatement.split("primary key,")
                    tmp[1] = tmp[1].replace(",", " varchar(150) DEFAULT NULL,")
                    tmp[1] = tmp[1].replace(");",
                                            " varchar(150) DEFAULT NULL);")
                    createStatement = ("primary key,").join(tmp)
            createStatement = createStatement.replace(
                "INTEGER PRIMARY KEY", "INTEGER AUTO_INCREMENT PRIMARY KEY")
            createStatement = createStatement.replace("AUTOINCREMENT",
                                                      "AUTO_INCREMENT")
            createStatement = createStatement.replace("DEFAULT 't'",
                                                      "DEFAULT '1'")
            createStatement = createStatement.replace("DEFAULT 'f'",
                                                      "DEFAULT '0'")
            createStatement = createStatement.replace(",'t'", ",'1'")
            createStatement = createStatement.replace(",'f'", ",'0'")
            if "CREATE TABLE `" in createStatement:
                createStatement = createStatement.replace(
                    "CREATE TABLE `",
                    "CREATE TABLE IF NOT EXISTS `" + self.tablePrefix)
            else:
                createStatement = createStatement.replace(
                    "CREATE TABLE ",
                    "CREATE TABLE IF NOT EXISTS " + self.tablePrefix)
            if ", primary key(" in createStatement:
                createStatement = createStatement.replace(
                    ", primary key(", """,
`dateCreated` datetime DEFAULT CURRENT_TIMESTAMP,
`dateLastModified` datetime DEFAULT CURRENT_TIMESTAMP,
`updated` tinyint(4) DEFAULT '0',
primary key(""")
            else:
                createStatement = createStatement.replace(
                    ");", """,
    `dateCreated` datetime DEFAULT CURRENT_TIMESTAMP,
    `dateLastModified` datetime DEFAULT CURRENT_TIMESTAMP,
    `updated` tinyint(4) DEFAULT '0');
                """)
            createStatement = createStatement.replace(
                " text primary key", " varchar(100) primary key")
            createStatement = createStatement.replace(
                "`EntryText` TEXT NOT NULL,", "`EntryText` TEXT,")
            createStatement = createStatement.replace(
                "`SelectionText` TEXT NOT NULL", "`SelectionText` TEXT")
            createStatement = createStatement.replace(
                "`Filename` INTEGER NOT NULL,", "`Filename` TEXT NOT NULL,")
            createStatement = createStatement.replace(
                "`SessionPartUUID` TEXT NOT NULL UNIQUE,",
                "`SessionPartUUID` VARCHAR(100) NOT NULL UNIQUE,")
            createStatement = createStatement.replace(
                "`Name` TEXT PRIMARY KEY NOT NULL",
                "`Name` VARCHAR(100) PRIMARY KEY NOT NULL")
            createStatement = createStatement.replace(" VARCHAR ",
                                                      " VARCHAR(100) ")
            createStatement = createStatement.replace(" VARCHAR,",
                                                      " VARCHAR(100),")

            if len(createStatement.lower().split("datecreated")) > 2:
                createStatement = createStatement.replace(
                    "`dateCreated` datetime DEFAULT CURRENT_TIMESTAMP,\n", "")

            # GRAB THE DATA TO ADD TO THE MYSQL DATABASE TABLES
            cur.execute("SELECT * from '%(table)s';" % locals())
            rows = cur.fetchall()

            allRows = []
            for row in rows:
                allRows.append(dict(row))

            # RECURSIVELY CREATE MISSING DIRECTORIES
            if not os.path.exists("/tmp/headjack/"):
                os.makedirs("/tmp/headjack/")

            writequery(
                log=self.log,
                sqlQuery=createStatement,
                dbConn=self.dbConn,
            )

            from fundamentals.mysql import insert_list_of_dictionaries_into_database_tables
            # USE dbSettings TO ACTIVATE MULTIPROCESSING
            insert_list_of_dictionaries_into_database_tables(
                dbConn=self.dbConn,
                log=self.log,
                dictList=allRows,
                dbTableName=self.tablePrefix + table,
                uniqueKeyList=[],
                dateModified=True,
                dateCreated=True,
                batchSize=10000,
                replace=True,
                dbSettings=self.settings["database settings"])

            # # DUMP THE DATA INTO A MYSQL DATABASE
            # dataSet = list_of_dictionaries(
            #     log=self.log,
            #     listOfDictionaries=allRows
            # )
            # originalList = dataSet.list
            # now = datetime.now()
            # now = now.strftime("%Y%m%dt%H%M%S%f.sql")
            # mysqlData = dataSet.mysql(
            # tableName=self.tablePrefix + table, filepath="/tmp/headjack/" +
            # now, createStatement=createStatement)

            # directory_script_runner(
            #     log=self.log,
            #     pathToScriptDirectory="/tmp/headjack/",
            #     databaseName=self.settings["database settings"]["db"],
            #     loginPath=self.settings["database settings"]["loginPath"],
            #     successRule="delete",
            #     failureRule="failed"
            # )

        con.close()

        self.log.debug('completed the ``convert_sqlite_to_mysql`` method')
        return None
Example #37
0
    def _import_unmatched_feeder_survey_sources_to_transientbucket(
            self, unmatched):
        """*assign a new transientbucketid to any feeder survey source not yet matched in steps. Copy these unmatched feeder survey rows to the transientbucket as new transient detections.*

        **Key Arguments**

        - ``unmatched`` -- the remaining unmatched feeder survey object names.

        """
        self.log.debug(
            'starting the ``_import_unmatched_feeder_survey_sources_to_transientbucket`` method'
        )

        if not len(unmatched):
            return None

        fsTableName = self.fsTableName
        fs_name = self.fs_name

        # READ MAX TRANSIENTBUCKET ID FROM TRANSIENTBUCKET
        sqlQuery = u"""
            select max(transientBucketId) as maxId from transientBucket
        """ % locals()
        rows = readquery(log=self.log, sqlQuery=sqlQuery, dbConn=self.dbConn)

        if not len(rows) or not rows[0]["maxId"]:
            maxId = 1
        else:
            maxId = rows[0]["maxId"] + 1

        # ADD NEW TRANSIENTBUCKETIDS TO FEEDER SURVEY TABLE
        updates = []
        newTransientBucketIds = []
        for u in unmatched:
            update = "update " + fsTableName + " set transientBucketId = " + \
                str(maxId) + " where " + fs_name + " = '" + str(u) + "';"
            updates.append(update)
            newTransientBucketIds.append(str(maxId))
            maxId += 1
        updates = ("\n").join(updates)
        writequery(log=self.log, sqlQuery=updates, dbConn=self.dbConn)

        # COPY FEEDER SURVEY ROWS TO TRANSIENTBUCKET
        self._feeder_survey_transientbucket_name_match_and_import()

        # SET THE MASTER ID FLAG FOR ALL NEW TRANSIENTS IN THE TRANSIENTBUCKET
        newTransientBucketIds = (",").join(newTransientBucketIds)
        sqlQuery = """UPDATE transientBucket t
                            JOIN
                        (SELECT 
                            transientBucketId, MIN(primaryKeyId) AS minpk
                        FROM
                            transientBucket
                        WHERE
                            transientBucketId IN (%(newTransientBucketIds)s)
                        GROUP BY transientBucketId) tmin ON t.primaryKeyId = tmin.minpk 
                    SET 
                        masterIDFlag = 1;""" % locals()
        writequery(log=self.log, sqlQuery=sqlQuery, dbConn=self.dbConn)

        self.log.debug(
            'completed the ``_import_unmatched_feeder_survey_sources_to_transientbucket`` method'
        )
        return None
def convert_dictionary_to_mysql_table(
        log,
        dictionary,
        dbTableName,
        uniqueKeyList=[],
        dbConn=False,
        createHelperTables=False,
        dateModified=False,
        returnInsertOnly=False,
        replace=False,
        batchInserts=True,
        reDatetime=False,
        skipChecks=False,
        dateCreated=True):
    """convert dictionary to mysql table

    **Key Arguments:**
        - ``log`` -- logger
        - ``dictionary`` -- python dictionary
        - ``dbConn`` -- the db connection
        - ``dbTableName`` -- name of the table you wish to add the data to (or create if it does not exist)
        - ``uniqueKeyList`` - a lists column names that need combined to create the primary key
        - ``createHelperTables`` -- create some helper tables with the main table, detailing original keywords etc
        - ``returnInsertOnly`` -- returns only the insert command (does not execute it)
        - ``dateModified`` -- add a modification date and updated flag to the mysql table
        - ``replace`` -- use replace instead of mysql insert statements (useful when updates are required)
        - ``batchInserts`` -- if returning insert statements return separate insert commands and value tuples
        - ``reDatetime`` -- compiled regular expression matching datetime (passing this in cuts down on execution time as it doesn't have to be recompiled everytime during multiple iterations of ``convert_dictionary_to_mysql_table``)
        - ``skipChecks`` -- skip reliability checks. Less robust but a little faster.
        - ``dateCreated`` -- add a timestamp for dateCreated?

    **Return:**
        - ``returnInsertOnly`` -- the insert statement if requested

    **Usage:**

        To add a python dictionary to a database table, creating the table and/or columns if they don't yet exist:

        .. code-block:: python

            from fundamentals.mysql import convert_dictionary_to_mysql_table
            dictionary = {"a newKey": "cool", "and another": "super cool",
                      "uniquekey1": "cheese", "uniqueKey2": "burgers"}

            convert_dictionary_to_mysql_table(
                dbConn=dbConn,
                log=log,
                dictionary=dictionary,
                dbTableName="testing_table",
                uniqueKeyList=["uniquekey1", "uniqueKey2"],
                dateModified=False,
                returnInsertOnly=False,
                replace=True
            )

        Or just return the insert statement with a list of value tuples, i.e. do not execute the command on the database:

            insertCommand, valueTuple = convert_dictionary_to_mysql_table(
                dbConn=dbConn,
                log=log,
                dictionary=dictionary,
                dbTableName="testing_table",
                uniqueKeyList=["uniquekey1", "uniqueKey2"],
                dateModified=False,
                returnInsertOnly=True,
                replace=False,
                batchInserts=True
            )

            print insertCommand, valueTuple

            # OUT: 'INSERT IGNORE INTO `testing_table`
            # (a_newKey,and_another,dateCreated,uniqueKey2,uniquekey1) VALUES
            # (%s, %s, %s, %s, %s)', ('cool', 'super cool',
            # '2016-06-21T12:08:59', 'burgers', 'cheese')

        You can also return a list of single insert statements using ``batchInserts = False``. Using ``replace = True`` will also add instructions about how to replace duplicate entries in the database table if found:

            inserts = convert_dictionary_to_mysql_table(
                dbConn=dbConn,
                log=log,
                dictionary=dictionary,
                dbTableName="testing_table",
                uniqueKeyList=["uniquekey1", "uniqueKey2"],
                dateModified=False,
                returnInsertOnly=True,
                replace=True,
                batchInserts=False
            )

            print inserts

            # OUT: INSERT INTO `testing_table` (a_newKey,and_another,dateCreated,uniqueKey2,uniquekey1)
            # VALUES ("cool" ,"super cool" ,"2016-09-14T13:12:08" ,"burgers" ,"cheese")
            # ON DUPLICATE KEY UPDATE  a_newKey="cool", and_another="super
            # cool", dateCreated="2016-09-14T13:12:08", uniqueKey2="burgers",
            # uniquekey1="cheese"
    """

    log.debug('starting the ``convert_dictionary_to_mysql_table`` function')

    if not reDatetime:
        reDatetime = re.compile('^[0-9]{4}-[0-9]{2}-[0-9]{2}T')

    if not replace:
        insertVerb = "INSERT"
    else:
        insertVerb = "INSERT IGNORE"

    if returnInsertOnly == False:
        # TEST THE ARGUMENTS
        if str(type(dbConn).__name__) != "Connection":
            message = 'Please use a valid MySQL DB connection.'
            log.critical(message)
            raise TypeError(message)

        if not isinstance(dictionary, dict):
            message = 'Please make sure "dictionary" argument is a dict type.'
            log.critical(message)
            raise TypeError(message)

        if not isinstance(uniqueKeyList, list):
            message = 'Please make sure "uniqueKeyList" is a list'
            log.critical(message)
            raise TypeError(message)

        for i in uniqueKeyList:
            if i not in dictionary.keys():
                message = 'Please make sure values in "uniqueKeyList" are present in the "dictionary" you are tring to convert'
                log.critical(message)
                raise ValueError(message)

        for k, v in dictionary.iteritems():
            # log.debug('k: %s, v: %s' % (k, v,))
            if isinstance(v, list) and len(v) != 2:
                message = 'Please make sure the list values in "dictionary" 2 items in length'
                log.critical("%s: in %s we have a %s (%s)" %
                             (message, k, v, type(v)))
                raise ValueError(message)
            if isinstance(v, list):
                if not (isinstance(v[0], str) or isinstance(v[0], int) or isinstance(v[0], bool) or isinstance(v[0], float) or isinstance(v[0], long) or isinstance(v[0], datetime.date) or v[0] == None):
                    message = 'Please make sure values in "dictionary" are of an appropriate value to add to the database, must be str, float, int or bool'
                    log.critical("%s: in %s we have a %s (%s)" %
                                 (message, k, v, type(v)))
                    raise ValueError(message)
            else:
                if not (isinstance(v, str) or isinstance(v, int) or isinstance(v, bool) or isinstance(v, float) or isinstance(v, long) or isinstance(v, unicode) or isinstance(v, datetime.date) or v == None):
                    this = type(v)
                    message = 'Please make sure values in "dictionary" are of an appropriate value to add to the database, must be str, float, int or bool : %(k)s is a %(this)s' % locals(
                    )
                    log.critical("%s: in %s we have a %s (%s)" %
                                 (message, k, v, type(v)))
                    raise ValueError(message)

        if not isinstance(createHelperTables, bool):
            message = 'Please make sure "createHelperTables" is a True or False'
            log.critical(message)
            raise TypeError(message)

        # TEST IF TABLE EXISTS
        if not skipChecks:
            tableExists = table_exists.table_exists(
                dbConn=dbConn,
                log=log,
                dbTableName=dbTableName
            )
        else:
            tableExists = False

        # CREATE THE TABLE IF IT DOES NOT EXIST
        if tableExists is False:
            sqlQuery = """
                CREATE TABLE IF NOT EXISTS `%(dbTableName)s`
                (`primaryId` bigint(20) NOT NULL AUTO_INCREMENT COMMENT 'An internal counter',
                `dateCreated` DATETIME NULL DEFAULT CURRENT_TIMESTAMP,
                `dateLastModified` DATETIME NULL DEFAULT CURRENT_TIMESTAMP,
                `updated` tinyint(4) DEFAULT '0',
                PRIMARY KEY (`primaryId`))
                ENGINE=MyISAM AUTO_INCREMENT=0 DEFAULT CHARSET=latin1;
            """ % locals()
            writequery(
                log=log,
                sqlQuery=sqlQuery,
                dbConn=dbConn,

            )

    qCreateColumn = ''
    formattedKey = ''
    formattedKeyList = []
    myValues = []

    # ADD EXTRA COLUMNS TO THE DICTIONARY todo: do I need this?
    if dateModified:
        dictionary['dateLastModified'] = [
            str(times.get_now_sql_datetime()), "date row was modified"]
        if replace == False:
            dictionary['updated'] = [0, "this row has been updated"]
        else:
            dictionary['updated'] = [1, "this row has been updated"]

    # ITERATE THROUGH THE DICTIONARY AND GENERATE THE TABLE COLUMN WITH THE
    # NAME OF THE KEY, IF IT DOES NOT EXIST
    count = len(dictionary)
    i = 1
    for (key, value) in dictionary.items():
        if (isinstance(value, list) and value[0] is None):
            del dictionary[key]
    # SORT THE DICTIONARY BY KEY
    odictionary = c.OrderedDict(sorted(dictionary.items()))
    for (key, value) in odictionary.iteritems():

        formattedKey = key.replace(" ", "_").replace("-", "_")
        # DEC A KEYWORD IN MYSQL - NEED TO CHANGE BEFORE INGEST
        if formattedKey == "dec":
            formattedKey = "decl"
        if formattedKey == "DEC":
            formattedKey = "DECL"

        formattedKeyList.extend([formattedKey])
        if len(key) > 0:
            # CONVERT LIST AND FEEDPARSER VALUES TO YAML (SO I CAN PASS IT AS A
            # STRING TO MYSQL)
            if isinstance(value, list) and (isinstance(value[0], list)):
                value[0] = yaml.dump(value[0])
                value[0] = str(value[0])
            # REMOVE CHARACTERS THAT COLLIDE WITH MYSQL
            # if type(value[0]) == str or type(value[0]) == unicode:
            #     value[0] = value[0].replace('"', """'""")
            # JOIN THE VALUES TOGETHER IN A LIST - EASIER TO GENERATE THE MYSQL
            # COMMAND LATER
            if isinstance(value, str):
                value = value.replace('\\', '\\\\')
                value = value.replace('"', '\\"')
                try:
                    udata = value.decode("utf-8", "ignore")
                    value = udata.encode("ascii", "ignore")
                except:
                    log.error('cound not decode value %(value)s' % locals())

                # log.debug('udata: %(udata)s' % locals())

            if isinstance(value, unicode):
                value = value.replace('"', '\\"')
                value = value.encode("ascii", "ignore")

            if isinstance(value, list) and isinstance(value[0], unicode):
                myValues.extend(['%s' % value[0].strip()])
            elif isinstance(value, list):
                myValues.extend(['%s' % (value[0], )])
            else:
                myValues.extend(['%s' % (value, )])

            if returnInsertOnly == False:
                # CHECK IF COLUMN EXISTS YET
                colExists = \
                    "SELECT * FROM information_schema.COLUMNS WHERE TABLE_SCHEMA=DATABASE() AND COLUMN_NAME='" + \
                    formattedKey + "'AND TABLE_NAME='" + dbTableName + """'"""
                try:
                    # log.debug('checking if the column '+formattedKey+' exists
                    # in the '+dbTableName+' table')

                    rows = readquery(
                        log=log,
                        sqlQuery=colExists,
                        dbConn=dbConn,
                    )
                except Exception as e:
                    log.error('something went wrong' + str(e) + '\n')

                # IF COLUMN DOESN'T EXIT - GENERATE IT
                if len(rows) == 0:
                    qCreateColumn = """ALTER TABLE `%s` ADD `%s""" % (
                        dbTableName, formattedKey)
                    if not isinstance(value, list):
                        value = [value]
                    if reDatetime.search(str(value[0])):
                        # log.debug('Ok - a datetime string was found')
                        qCreateColumn += '` datetime DEFAULT NULL'
                    elif formattedKey == 'updated_parsed' or formattedKey == 'published_parsed' or formattedKey \
                            == 'feedName' or formattedKey == 'title':
                        qCreateColumn += '` varchar(100) DEFAULT NULL'
                    elif (isinstance(value[0], str) or isinstance(value[0], unicode)) and len(value[0]) < 30:
                        qCreateColumn += '` varchar(100) DEFAULT NULL'
                    elif (isinstance(value[0], str) or isinstance(value[0], unicode)) and len(value[0]) >= 30 and len(value[0]) < 80:
                        qCreateColumn += '` varchar(100) DEFAULT NULL'
                    elif isinstance(value[0], str) or isinstance(value[0], unicode):
                        columnLength = 450 + len(value[0]) * 2
                        qCreateColumn += '` varchar(' + str(
                            columnLength) + ') DEFAULT NULL'
                    elif isinstance(value[0], int) and abs(value[0]) <= 9:
                        qCreateColumn += '` tinyint DEFAULT NULL'
                    elif isinstance(value[0], int):
                        qCreateColumn += '` int DEFAULT NULL'
                    elif isinstance(value[0], float) or isinstance(value[0], long):
                        qCreateColumn += '` double DEFAULT NULL'
                    elif isinstance(value[0], bool):
                        qCreateColumn += '` tinyint DEFAULT NULL'
                    elif isinstance(value[0], list):
                        qCreateColumn += '` varchar(1024) DEFAULT NULL'
                    else:
                        # log.debug('Do not know what format to add this key in
                        # MySQL - removing from dictionary: %s, %s'
                                 # % (key, type(value[0])))
                        formattedKeyList.pop()
                        myValues.pop()
                        qCreateColumn = None
                    if qCreateColumn:
                        # ADD COMMENT TO GIVE THE ORGINAL KEYWORD IF formatted FOR
                        # MYSQL
                        if key is not formattedKey:
                            qCreateColumn += " COMMENT 'original keyword: " + \
                                key + """'"""
                        # CREATE THE COLUMN IF IT DOES NOT EXIST
                        try:
                            log.info('creating the ' +
                                     formattedKey + ' column in the ' + dbTableName + ' table')
                            writequery(
                                log=log,
                                sqlQuery=qCreateColumn,
                                dbConn=dbConn
                            )

                        except Exception as e:
                            # log.debug('qCreateColumn: %s' % (qCreateColumn,
                            # ))
                            log.error('could not create the ' + formattedKey + ' column in the ' + dbTableName
                                      + ' table -- ' + str(e) + '\n')

    if returnInsertOnly == False:
        # GENERATE THE INDEX NAME - THEN CREATE INDEX IF IT DOES NOT YET EXIST
        if len(uniqueKeyList):
            for i in range(len(uniqueKeyList)):
                uniqueKeyList[i] = uniqueKeyList[
                    i].replace(" ", "_").replace("-", "_")
                if uniqueKeyList[i] == "dec":
                    uniqueKeyList[i] = "decl"
                if uniqueKeyList[i] == "DEC":
                    uniqueKeyList[i] = "DECL"

            indexName = uniqueKeyList[0].replace(" ", "_").replace("-", "_")
            for i in range(len(uniqueKeyList) - 1):
                indexName += '_' + uniqueKeyList[i + 1]

            indexName = indexName.lower().replace("  ", " ").replace(" ", "_")

            sqlQuery = u"""SELECT COUNT(*) FROM INFORMATION_SCHEMA.STATISTICS WHERE TABLE_SCHEMA = DATABASE() AND TABLE_NAME = '""" + \
                dbTableName + """' AND INDEX_NAME = '""" + indexName + """'"""
            rows = readquery(
                log=log,
                sqlQuery=sqlQuery,
                dbConn=dbConn,
                quiet=False
            )

            exists = rows[0]['COUNT(*)']
            # log.debug('uniqueKeyList: %s' % (uniqueKeyList,))
            if exists == 0:
                if isinstance(uniqueKeyList, list):
                    uniqueKeyList = ','.join(uniqueKeyList)

                addUniqueKey = 'ALTER TABLE `' + dbTableName + \
                    '` ADD unique ' + indexName + \
                    """ (""" + uniqueKeyList + ')'
                # log.debug('HERE IS THE COMMAND:'+addUniqueKey)
                writequery(
                    log=log,
                    sqlQuery=addUniqueKey,
                    dbConn=dbConn
                )

    if returnInsertOnly == True and batchInserts == True:
        myKeys = '`,`'.join(formattedKeyList)
        valueString = ("%s, " * len(myValues))[:-2]
        insertCommand = insertVerb + """ INTO `""" + dbTableName + \
            """` (`""" + myKeys + """`, dateCreated) VALUES (""" + \
            valueString + """, NOW())"""
        mv = []
        mv[:] = [None if m == "None" else m for m in myValues]
        valueTuple = tuple(mv)

        dup = ""
        if replace:
            dup = " ON DUPLICATE KEY UPDATE "
            for k, v in zip(formattedKeyList, mv):
                dup = """%(dup)s %(k)s=values(%(k)s),""" % locals()

        insertCommand = insertCommand + dup

        insertCommand = insertCommand.replace('\\""', '\\" "')
        insertCommand = insertCommand.replace('""', "null")
        insertCommand = insertCommand.replace('!!python/unicode:', '')
        insertCommand = insertCommand.replace('!!python/unicode', '')
        insertCommand = insertCommand.replace('"None"', 'null')

        if not dateCreated:
            insertCommand = insertCommand.replace(
                ", dateCreated)", ")").replace(", NOW())", ")")

        return insertCommand, valueTuple

    # GENERATE THE INSERT COMMAND - IGNORE DUPLICATE ENTRIES
    myKeys = '`,`'.join(formattedKeyList)
    myValues = '" ,"'.join(myValues)
    # log.debug(myValues+" ------ PRESTRIP")
    # REMOVE SOME CONVERSION NOISE
    myValues = myValues.replace('time.struct_time', '')
    myValues = myValues.replace(
        '- !!python/object/new:feedparser.FeedParserDict', '')
    myValues = myValues.replace(
        '!!python/object/new:feedparser.FeedParserDict', '')
    myValues = myValues.replace('dictitems:', '')
    myValues = myValues.replace('dictitems', '')
    myValues = myValues.replace('!!python/unicode:', '')
    myValues = myValues.replace('!!python/unicode', '')
    myValues = myValues.replace('"None"', 'null')
    # myValues = myValues.replace('"None', 'null')

    if myValues[-4:] != 'null':
        myValues += '"'

    dup = ""
    if replace:
        dupValues = ('"' + myValues).split(" ,")
        dupKeys = formattedKeyList
        dup = dup + " ON DUPLICATE KEY UPDATE "
        for k, v in zip(dupKeys, dupValues):
            dup = """%(dup)s `%(k)s`=%(v)s,""" % locals()

        if dateModified:
            dup = """%(dup)s updated=IF(""" % locals()
            for k, v in zip(dupKeys, dupValues):
                if v == "null":
                    dup = """%(dup)s `%(k)s` is %(v)s AND """ % locals()
                else:
                    dup = """%(dup)s `%(k)s`=%(v)s AND """ % locals()
            dup = dup[:-5] + ", 0, 1), dateLastModified=IF("
            for k, v in zip(dupKeys, dupValues):
                if v == "null":
                    dup = """%(dup)s `%(k)s` is %(v)s AND """ % locals()
                else:
                    dup = """%(dup)s `%(k)s`=%(v)s AND """ % locals()
            dup = dup[:-5] + ", dateLastModified, NOW())"
        else:
            dup = dup[:-1]

    # log.debug(myValues+" ------ POSTSTRIP")
    addValue = insertVerb + """ INTO `""" + dbTableName + \
        """` (`""" + myKeys + """`, dateCreated) VALUES (\"""" + \
        myValues + """, NOW()) %(dup)s """ % locals()

    if not dateCreated:
        addValue = addValue.replace(
            ", dateCreated)", ")").replace(", NOW())", ")", 1)

    addValue = addValue.replace('\\""', '\\" "')
    addValue = addValue.replace('""', "null")
    addValue = addValue.replace('!!python/unicode:', '')
    addValue = addValue.replace('!!python/unicode', '')
    addValue = addValue.replace('"None"', 'null')
    # log.debug(addValue)

    if returnInsertOnly == True:
        return addValue

    message = ""
    try:
        # log.debug('adding new data to the %s table; query: %s' %
        # (dbTableName, addValue))"
        writequery(
            log=log,
            sqlQuery=addValue,
            dbConn=dbConn
        )

    except Exception as e:
        log.error("could not add new data added to the table '" +
                  dbTableName + "' : " + str(e) + '\n')

    log.debug('completed the ``convert_dictionary_to_mysql_table`` function')
    return None, None
Example #39
0
def convert_dictionary_to_mysql_table(
        log,
        dictionary,
        dbTableName,
        uniqueKeyList=[],
        dbConn=False,
        createHelperTables=False,
        dateModified=False,
        returnInsertOnly=False,
        replace=False,
        batchInserts=True,
        reDatetime=False,
        skipChecks=False,
        dateCreated=True):
    """convert dictionary to mysql table

    **Key Arguments:**
        - ``log`` -- logger
        - ``dictionary`` -- python dictionary
        - ``dbConn`` -- the db connection
        - ``dbTableName`` -- name of the table you wish to add the data to (or create if it does not exist)
        - ``uniqueKeyList`` - a lists column names that need combined to create the primary key
        - ``createHelperTables`` -- create some helper tables with the main table, detailing original keywords etc
        - ``returnInsertOnly`` -- returns only the insert command (does not execute it)
        - ``dateModified`` -- add a modification date and updated flag to the mysql table
        - ``replace`` -- use replace instead of mysql insert statements (useful when updates are required)
        - ``batchInserts`` -- if returning insert statements return separate insert commands and value tuples
        - ``reDatetime`` -- compiled regular expression matching datetime (passing this in cuts down on execution time as it doesn't have to be recompiled everytime during multiple iterations of ``convert_dictionary_to_mysql_table``)
        - ``skipChecks`` -- skip reliability checks. Less robust but a little faster.
        - ``dateCreated`` -- add a timestamp for dateCreated?

    **Return:**
        - ``returnInsertOnly`` -- the insert statement if requested

    **Usage:**

        To add a python dictionary to a database table, creating the table and/or columns if they don't yet exist:

        .. code-block:: python

            from fundamentals.mysql import convert_dictionary_to_mysql_table
            dictionary = {"a newKey": "cool", "and another": "super cool",
                      "uniquekey1": "cheese", "uniqueKey2": "burgers"}

            convert_dictionary_to_mysql_table(
                dbConn=dbConn,
                log=log,
                dictionary=dictionary,
                dbTableName="testing_table",
                uniqueKeyList=["uniquekey1", "uniqueKey2"],
                dateModified=False,
                returnInsertOnly=False,
                replace=True
            )

        Or just return the insert statement with a list of value tuples, i.e. do not execute the command on the database:

            insertCommand, valueTuple = convert_dictionary_to_mysql_table(
                dbConn=dbConn,
                log=log,
                dictionary=dictionary,
                dbTableName="testing_table",
                uniqueKeyList=["uniquekey1", "uniqueKey2"],
                dateModified=False,
                returnInsertOnly=True,
                replace=False,
                batchInserts=True
            )

            print(insertCommand, valueTuple)

            # OUT: 'INSERT IGNORE INTO `testing_table`
            # (a_newKey,and_another,dateCreated,uniqueKey2,uniquekey1) VALUES
            # (%s, %s, %s, %s, %s)', ('cool', 'super cool',
            # '2016-06-21T12:08:59', 'burgers', 'cheese')

        You can also return a list of single insert statements using ``batchInserts = False``. Using ``replace = True`` will also add instructions about how to replace duplicate entries in the database table if found:

            inserts = convert_dictionary_to_mysql_table(
                dbConn=dbConn,
                log=log,
                dictionary=dictionary,
                dbTableName="testing_table",
                uniqueKeyList=["uniquekey1", "uniqueKey2"],
                dateModified=False,
                returnInsertOnly=True,
                replace=True,
                batchInserts=False
            )

            print(inserts)

            # OUT: INSERT INTO `testing_table` (a_newKey,and_another,dateCreated,uniqueKey2,uniquekey1)
            # VALUES ("cool" ,"super cool" ,"2016-09-14T13:12:08" ,"burgers" ,"cheese")
            # ON DUPLICATE KEY UPDATE  a_newKey="cool", and_another="super
            # cool", dateCreated="2016-09-14T13:12:08", uniqueKey2="burgers",
            # uniquekey1="cheese"
    """

    log.debug('starting the ``convert_dictionary_to_mysql_table`` function')

    if not reDatetime:
        reDatetime = re.compile('^[0-9]{4}-[0-9]{2}-[0-9]{2}T')

    if not replace:
        insertVerb = "INSERT"
    else:
        insertVerb = "INSERT IGNORE"

    if returnInsertOnly == False:
        # TEST THE ARGUMENTS
        if str(type(dbConn).__name__) != "Connection":
            message = 'Please use a valid MySQL DB connection.'
            log.critical(message)
            raise TypeError(message)

        if not isinstance(dictionary, dict):
            message = 'Please make sure "dictionary" argument is a dict type.'
            log.critical(message)
            raise TypeError(message)

        if not isinstance(uniqueKeyList, list):
            message = 'Please make sure "uniqueKeyList" is a list'
            log.critical(message)
            raise TypeError(message)

        for i in uniqueKeyList:
            if i not in list(dictionary.keys()):
                message = 'Please make sure values in "uniqueKeyList" are present in the "dictionary" you are tring to convert'
                log.critical(message)
                raise ValueError(message)

        for k, v in list(dictionary.items()):
            # log.debug('k: %s, v: %s' % (k, v,))
            if isinstance(v, list) and len(v) != 2:
                message = 'Please make sure the list values in "dictionary" 2 items in length'
                log.critical("%s: in %s we have a %s (%s)" %
                             (message, k, v, type(v)))
                raise ValueError(message)
            if isinstance(v, list):
                if not (isinstance(v[0], six.string_types) or isinstance(v[0], int) or isinstance(v[0], bool) or isinstance(v[0], float) or isinstance(v[0], int) or isinstance(v[0], datetime.date) or v[0] == None):
                    message = 'Please make sure values in "dictionary" are of an appropriate value to add to the database, must be str, float, int or bool'
                    log.critical("%s: in %s we have a %s (%s)" %
                                 (message, k, v, type(v)))
                    raise ValueError(message)
            else:
                if not (isinstance(v, six.string_types) or isinstance(v, int) or isinstance(v, bool) or isinstance(v, float) or isinstance(v, datetime.date) or v == None or "int" in str(type(v))):
                    this = type(v)
                    message = 'Please make sure values in "dictionary" are of an appropriate value to add to the database, must be str, float, int or bool : %(k)s is a %(this)s' % locals(
                    )
                    log.critical("%s: in %s we have a %s (%s)" %
                                 (message, k, v, type(v)))
                    raise ValueError(message)

        if not isinstance(createHelperTables, bool):
            message = 'Please make sure "createHelperTables" is a True or False'
            log.critical(message)
            raise TypeError(message)

        # TEST IF TABLE EXISTS
        if not skipChecks:
            tableExists = table_exists.table_exists(
                dbConn=dbConn,
                log=log,
                dbTableName=dbTableName
            )
        else:
            tableExists = False

        # CREATE THE TABLE IF IT DOES NOT EXIST
        if tableExists is False:
            sqlQuery = """
                CREATE TABLE IF NOT EXISTS `%(dbTableName)s`
                (`primaryId` bigint(20) NOT NULL AUTO_INCREMENT COMMENT 'An internal counter',
                `dateCreated` DATETIME NULL DEFAULT CURRENT_TIMESTAMP,
                `dateLastModified` DATETIME NULL DEFAULT CURRENT_TIMESTAMP,
                `updated` tinyint(4) DEFAULT '0',
                PRIMARY KEY (`primaryId`))
                ENGINE=MyISAM AUTO_INCREMENT=0 DEFAULT CHARSET=latin1;
            """ % locals()
            writequery(
                log=log,
                sqlQuery=sqlQuery,
                dbConn=dbConn,

            )

    qCreateColumn = ''
    formattedKey = ''
    formattedKeyList = []
    myValues = []

    # ADD EXTRA COLUMNS TO THE DICTIONARY todo: do I need this?
    if dateModified:
        dictionary['dateLastModified'] = [
            str(times.get_now_sql_datetime()), "date row was modified"]
        if replace == False:
            dictionary['updated'] = [0, "this row has been updated"]
        else:
            dictionary['updated'] = [1, "this row has been updated"]

    # ITERATE THROUGH THE DICTIONARY AND GENERATE THE TABLE COLUMN WITH THE
    # NAME OF THE KEY, IF IT DOES NOT EXIST
    count = len(dictionary)
    i = 1
    for (key, value) in list(dictionary.items()):
        if (isinstance(value, list) and value[0] is None):
            del dictionary[key]
    # SORT THE DICTIONARY BY KEY
    odictionary = c.OrderedDict(sorted(dictionary.items()))
    for (key, value) in list(odictionary.items()):

        formattedKey = key.replace(" ", "_").replace("-", "_")
        # DEC A KEYWORD IN MYSQL - NEED TO CHANGE BEFORE INGEST
        if formattedKey == u"dec":
            formattedKey = u"decl"
        if formattedKey == u"DEC":
            formattedKey = u"DECL"

        formattedKeyList.extend([formattedKey])
        if len(key) > 0:
            # CONVERT LIST AND FEEDPARSER VALUES TO YAML (SO I CAN PASS IT AS A
            # STRING TO MYSQL)
            if isinstance(value, list) and (isinstance(value[0], list)):
                value[0] = yaml.dump(value[0])
                value[0] = str(value[0])
            # REMOVE CHARACTERS THAT COLLIDE WITH MYSQL
            # JOIN THE VALUES TOGETHER IN A LIST - EASIER TO GENERATE THE MYSQL
            # COMMAND LATER
            if isinstance(value, str):
                value = value.replace('\\', '\\\\')
                value = value.replace('"', '\\"')
                try:
                    udata = value.decode("utf-8", "ignore")
                    value = udata.encode("ascii", "ignore")
                except:
                    pass

                # log.debug('udata: %(udata)s' % locals())

            if isinstance(value, list) and isinstance(value[0], str):
                myValues.extend(['%s' % value[0].strip()])
            elif isinstance(value, list):
                myValues.extend(['%s' % (value[0], )])
            else:
                myValues.extend(['%s' % (value, )])

            if returnInsertOnly == False:
                # CHECK IF COLUMN EXISTS YET
                colExists = \
                    "SELECT * FROM information_schema.COLUMNS WHERE TABLE_SCHEMA=DATABASE() AND COLUMN_NAME='" + \
                    formattedKey + "'AND TABLE_NAME='" + dbTableName + """'"""
                try:
                    # log.debug('checking if the column '+formattedKey+' exists
                    # in the '+dbTableName+' table')

                    rows = readquery(
                        log=log,
                        sqlQuery=colExists,
                        dbConn=dbConn,
                    )
                except Exception as e:
                    log.error('something went wrong' + str(e) + '\n')

                # IF COLUMN DOESN'T EXIT - GENERATE IT
                if len(rows) == 0:
                    qCreateColumn = """ALTER TABLE `%s` ADD `%s""" % (
                        dbTableName, formattedKey)
                    if not isinstance(value, list):
                        value = [value]
                    if reDatetime.search(str(value[0])):
                        # log.debug('Ok - a datetime string was found')
                        qCreateColumn += '` datetime DEFAULT NULL'
                    elif formattedKey == 'updated_parsed' or formattedKey == 'published_parsed' or formattedKey \
                            == 'feedName' or formattedKey == 'title':
                        qCreateColumn += '` varchar(100) DEFAULT NULL'
                    elif isinstance(value[0], ("".__class__, u"".__class__)) and len(value[0]) < 30:
                        qCreateColumn += '` varchar(100) DEFAULT NULL'
                    elif isinstance(value[0], ("".__class__, u"".__class__)) and len(value[0]) >= 30 and len(value[0]) < 80:
                        qCreateColumn += '` varchar(100) DEFAULT NULL'
                    elif isinstance(value[0], ("".__class__, u"".__class__)):
                        columnLength = 450 + len(value[0]) * 2
                        qCreateColumn += '` varchar(' + str(
                            columnLength) + ') DEFAULT NULL'
                    elif isinstance(value[0], int) and abs(value[0]) <= 9:
                        qCreateColumn += '` tinyint DEFAULT NULL'
                    elif isinstance(value[0], int):
                        qCreateColumn += '` int DEFAULT NULL'
                    elif isinstance(value[0], float) or isinstance(value[0], int):
                        qCreateColumn += '` double DEFAULT NULL'
                    elif isinstance(value[0], bool):
                        qCreateColumn += '` tinyint DEFAULT NULL'
                    elif isinstance(value[0], list):
                        qCreateColumn += '` varchar(1024) DEFAULT NULL'
                    else:
                        # log.debug('Do not know what format to add this key in
                        # MySQL - removing from dictionary: %s, %s'
                        # % (key, type(value[0])))
                        formattedKeyList.pop()
                        myValues.pop()
                        qCreateColumn = None
                    if qCreateColumn:
                        # ADD COMMENT TO GIVE THE ORGINAL KEYWORD IF formatted FOR
                        # MYSQL
                        if key is not formattedKey:
                            qCreateColumn += " COMMENT 'original keyword: " + \
                                key + """'"""
                        # CREATE THE COLUMN IF IT DOES NOT EXIST
                        try:
                            log.info('creating the ' +
                                     formattedKey + ' column in the ' + dbTableName + ' table')
                            writequery(
                                log=log,
                                sqlQuery=qCreateColumn,
                                dbConn=dbConn
                            )

                        except Exception as e:
                            # log.debug('qCreateColumn: %s' % (qCreateColumn,
                            # ))
                            log.error('could not create the ' + formattedKey + ' column in the ' + dbTableName
                                      + ' table -- ' + str(e) + '\n')

    if returnInsertOnly == False:
        # GENERATE THE INDEX NAME - THEN CREATE INDEX IF IT DOES NOT YET EXIST
        if len(uniqueKeyList):
            for i in range(len(uniqueKeyList)):
                uniqueKeyList[i] = uniqueKeyList[
                    i].replace(" ", "_").replace("-", "_")
                if uniqueKeyList[i] == u"dec":
                    uniqueKeyList[i] = u"decl"
                if uniqueKeyList[i] == u"DEC":
                    uniqueKeyList[i] = u"DECL"

            indexName = uniqueKeyList[0].replace(" ", "_").replace("-", "_")
            for i in range(len(uniqueKeyList) - 1):
                indexName += '_' + uniqueKeyList[i + 1]

            indexName = indexName.lower().replace("  ", " ").replace(" ", "_")

            sqlQuery = u"""SELECT COUNT(*) FROM INFORMATION_SCHEMA.STATISTICS WHERE TABLE_SCHEMA = DATABASE() AND TABLE_NAME = '""" + \
                dbTableName + """' AND INDEX_NAME = '""" + indexName + """'"""
            rows = readquery(
                log=log,
                sqlQuery=sqlQuery,
                dbConn=dbConn,
                quiet=False
            )

            exists = rows[0]['COUNT(*)']
            # log.debug('uniqueKeyList: %s' % (uniqueKeyList,))
            if exists == 0:
                if isinstance(uniqueKeyList, list):
                    uniqueKeyList = ','.join(uniqueKeyList)

                addUniqueKey = 'ALTER TABLE `' + dbTableName + \
                    '` ADD unique ' + indexName + \
                    """ (""" + uniqueKeyList + ')'
                # log.debug('HERE IS THE COMMAND:'+addUniqueKey)
                writequery(
                    log=log,
                    sqlQuery=addUniqueKey,
                    dbConn=dbConn
                )

    if returnInsertOnly == True and batchInserts == True:
        myKeys = '`,`'.join(formattedKeyList)
        valueString = ("%s, " * len(myValues))[:-2]
        insertCommand = insertVerb + """ INTO `""" + dbTableName + \
            """` (`""" + myKeys + """`, dateCreated) VALUES (""" + \
            valueString + """, NOW())"""
        mv = []
        mv[:] = [None if m == u"None" else m for m in myValues]
        valueTuple = tuple(mv)

        dup = ""
        if replace:
            dup = " ON DUPLICATE KEY UPDATE "
            for k, v in zip(formattedKeyList, mv):
                dup = """%(dup)s %(k)s=values(%(k)s),""" % locals()

        insertCommand = insertCommand + dup

        insertCommand = insertCommand.replace('\\""', '\\" "')
        insertCommand = insertCommand.replace('""', "null")
        insertCommand = insertCommand.replace('!!python/unicode:', '')
        insertCommand = insertCommand.replace('!!python/unicode', '')
        insertCommand = insertCommand.replace('"None"', 'null')
        insertCommand = insertCommand.replace('"null"', 'null')

        if not dateCreated:
            insertCommand = insertCommand.replace(
                ", dateCreated)", ")").replace(", NOW())", ")")

        return insertCommand, valueTuple

    # GENERATE THE INSERT COMMAND - IGNORE DUPLICATE ENTRIES
    myKeys = '`,`'.join(formattedKeyList)
    myValues = '" ,"'.join(myValues)
    # log.debug(myValues+" ------ PRESTRIP")
    # REMOVE SOME CONVERSION NOISE
    myValues = myValues.replace('time.struct_time', '')
    myValues = myValues.replace(
        '- !!python/object/new:feedparser.FeedParserDict', '')
    myValues = myValues.replace(
        '!!python/object/new:feedparser.FeedParserDict', '')
    myValues = myValues.replace('dictitems:', '')
    myValues = myValues.replace('dictitems', '')
    myValues = myValues.replace('!!python/unicode:', '')
    myValues = myValues.replace('!!python/unicode', '')
    myValues = myValues.replace('"None"', 'null')
    myValues = myValues.replace('"null"', 'null')
    # myValues = myValues.replace('"None', 'null')

    if myValues[-4:] != 'null':
        myValues += '"'

    dup = ""
    if replace:
        dupValues = ('"' + myValues).split(" ,")
        dupKeys = formattedKeyList
        dup = dup + " ON DUPLICATE KEY UPDATE "
        for k, v in zip(dupKeys, dupValues):
            dup = """%(dup)s `%(k)s`=%(v)s,""" % locals()

        if dateModified:
            dup = """%(dup)s updated=IF(""" % locals()
            for k, v in zip(dupKeys, dupValues):
                if v == "null":
                    dup = """%(dup)s `%(k)s` is %(v)s AND """ % locals()
                else:
                    dup = """%(dup)s `%(k)s`=%(v)s AND """ % locals()
            dup = dup[:-5] + ", 0, 1), dateLastModified=IF("
            for k, v in zip(dupKeys, dupValues):
                if v == "null":
                    dup = """%(dup)s `%(k)s` is %(v)s AND """ % locals()
                else:
                    dup = """%(dup)s `%(k)s`=%(v)s AND """ % locals()
            dup = dup[:-5] + ", dateLastModified, NOW())"
        else:
            dup = dup[:-1]

    # log.debug(myValues+" ------ POSTSTRIP")
    addValue = insertVerb + """ INTO `""" + dbTableName + \
        """` (`""" + myKeys + """`, dateCreated) VALUES (\"""" + \
        myValues + """, NOW()) %(dup)s """ % locals()

    if not dateCreated:
        addValue = addValue.replace(
            ", dateCreated)", ")").replace(", NOW())", ")", 1)

    addValue = addValue.replace('\\""', '\\" "')
    addValue = addValue.replace('""', "null")
    addValue = addValue.replace('!!python/unicode:', '')
    addValue = addValue.replace('!!python/unicode', '')
    addValue = addValue.replace('"None"', 'null')
    addValue = addValue.replace('"null"', 'null')
    # log.debug(addValue)

    if returnInsertOnly == True:
        return addValue

    message = ""
    try:
        # log.debug('adding new data to the %s table; query: %s' %
        # (dbTableName, addValue))"
        writequery(
            log=log,
            sqlQuery=addValue,
            dbConn=dbConn
        )

    except Exception as e:
        log.error("could not add new data added to the table '" +
                  dbTableName + "' : " + str(e) + '\n')

    log.debug('completed the ``convert_dictionary_to_mysql_table`` function')
    return None, None
    def convert_sqlite_to_mysql(
            self):
        """*copy the contents of the sqlite database into the mysql database*

        See class docstring for usage
        """
        from fundamentals.renderer import list_of_dictionaries
        from fundamentals.mysql import directory_script_runner
        self.log.debug('starting the ``convert_sqlite_to_mysql`` method')

        con = lite.connect(self.pathToSqlite)
        con.row_factory = lite.Row
        cur = con.cursor()

        # GET ALL TABLE NAMES
        cur.execute("SELECT name FROM sqlite_master WHERE type='table';")
        tables = cur.fetchall()

        createStatements = []
        inserts = []
        for table in tables:
            table = table['name']
            if table == "sqlite_sequence":
                continue

            # CREATE TABLE collection_books (folder_id, fingerprint, primary key(folder_id, fingerprint));
            # GENEREATE THE MYSQL CREATE STATEMENTS FOR EACH TABLE
            cur.execute(
                "SELECT sql FROM sqlite_master WHERE name = '%(table)s';" % locals())
            createStatement = cur.fetchone()
            createStatement = createStatement[0].replace('"', '`') + ";"
            if "DEFAULT" not in createStatement:
                if "primary key(" in createStatement:
                    tmp = createStatement.split("primary key(")
                    tmp[0] = tmp[0].replace(
                        ",", " varchar(150) DEFAULT NULL,")
                    createStatement = ("primary key(").join(tmp)
                if "primary key," in createStatement:
                    tmp = createStatement.split("primary key,")
                    tmp[1] = tmp[1].replace(
                        ",", " varchar(150) DEFAULT NULL,")
                    tmp[1] = tmp[1].replace(
                        ");", " varchar(150) DEFAULT NULL);")
                    createStatement = ("primary key,").join(tmp)
            createStatement = createStatement.replace(
                "INTEGER PRIMARY KEY", "INTEGER AUTO_INCREMENT PRIMARY KEY")
            createStatement = createStatement.replace(
                "AUTOINCREMENT", "AUTO_INCREMENT")
            createStatement = createStatement.replace(
                "DEFAULT 't'", "DEFAULT '1'")
            createStatement = createStatement.replace(
                "DEFAULT 'f'", "DEFAULT '0'")
            createStatement = createStatement.replace(",'t'", ",'1'")
            createStatement = createStatement.replace(",'f'", ",'0'")
            if "CREATE TABLE `" in createStatement:
                createStatement = createStatement.replace(
                    "CREATE TABLE `", "CREATE TABLE IF NOT EXISTS `" + self.tablePrefix)
            else:
                createStatement = createStatement.replace(
                    "CREATE TABLE ", "CREATE TABLE IF NOT EXISTS " + self.tablePrefix)
            if ", primary key(" in createStatement:
                createStatement = createStatement.replace(", primary key(", """,
`dateCreated` datetime DEFAULT CURRENT_TIMESTAMP,
`dateLastModified` datetime DEFAULT CURRENT_TIMESTAMP,
`updated` tinyint(4) DEFAULT '0',
primary key(""")
            else:
                createStatement = createStatement.replace(");", """,
    `dateCreated` datetime DEFAULT CURRENT_TIMESTAMP,
    `dateLastModified` datetime DEFAULT CURRENT_TIMESTAMP,
    `updated` tinyint(4) DEFAULT '0');
                """)
            createStatement = createStatement.replace(
                " text primary key", " varchar(100) primary key")
            createStatement = createStatement.replace(
                "`EntryText` TEXT NOT NULL,", "`EntryText` TEXT,")
            createStatement = createStatement.replace(
                "`SelectionText` TEXT NOT NULL", "`SelectionText` TEXT")
            createStatement = createStatement.replace(
                "`Filename` INTEGER NOT NULL,", "`Filename` TEXT NOT NULL,")
            createStatement = createStatement.replace(
                "`SessionPartUUID` TEXT NOT NULL UNIQUE,", "`SessionPartUUID` VARCHAR(100) NOT NULL UNIQUE,")
            createStatement = createStatement.replace(
                "`Name` TEXT PRIMARY KEY NOT NULL", "`Name` VARCHAR(100) PRIMARY KEY NOT NULL")
            createStatement = createStatement.replace(
                " VARCHAR ", " VARCHAR(100) ")
            createStatement = createStatement.replace(
                " VARCHAR,", " VARCHAR(100),")

            # GRAB THE DATA TO ADD TO THE MYSQL DATABASE TABLES
            cur.execute(
                "SELECT * from '%(table)s';" % locals())
            rows = cur.fetchall()

            allRows = []
            for row in rows:
                allRows.append(dict(row))

            # RECURSIVELY CREATE MISSING DIRECTORIES
            if not os.path.exists("/tmp/headjack/"):
                os.makedirs("/tmp/headjack/")

            writequery(
                log=self.log,
                sqlQuery=createStatement,
                dbConn=self.dbConn,
            )

            from fundamentals.mysql import insert_list_of_dictionaries_into_database_tables
            # USE dbSettings TO ACTIVATE MULTIPROCESSING
            insert_list_of_dictionaries_into_database_tables(
                dbConn=self.dbConn,
                log=self.log,
                dictList=allRows,
                dbTableName=self.tablePrefix + table,
                uniqueKeyList=[],
                dateModified=True,
                dateCreated=True,
                batchSize=10000,
                replace=True,
                dbSettings=self.settings["database settings"]
            )

            # # DUMP THE DATA INTO A MYSQL DATABASE
            # dataSet = list_of_dictionaries(
            #     log=self.log,
            #     listOfDictionaries=allRows
            # )
            # originalList = dataSet.list
            # now = datetime.now()
            # now = now.strftime("%Y%m%dt%H%M%S%f.sql")
            # mysqlData = dataSet.mysql(
            # tableName=self.tablePrefix + table, filepath="/tmp/headjack/" +
            # now, createStatement=createStatement)

            # directory_script_runner(
            #     log=self.log,
            #     pathToScriptDirectory="/tmp/headjack/",
            #     databaseName=self.settings["database settings"]["db"],
            #     loginPath=self.settings["database settings"]["loginPath"],
            #     successRule="delete",
            #     failureRule="failed"
            # )

        con.close()

        self.log.debug('completed the ``convert_sqlite_to_mysql`` method')
        return None
Example #41
0
def generate_atlas_lightcurves(
        dbConn,
        log,
        settings):
    """generate atlas lightcurves

    **Key Arguments**

    - ``dbConn`` -- mysql database connection
    - ``log`` -- logger
    - ``settings`` -- settings for the marshall.
    

    **Return**

    - None
    

    **Usage**

    ..todo::

        add usage info
        create a sublime snippet for usage

    ```python
    usage code
    ```
    

    ..todo::

        - @review: when complete, clean generate_atlas_lightcurves function
        - @review: when complete add logging
        - @review: when complete, decide whether to abstract function to another module
    """
    log.debug('starting the ``generate_atlas_lightcurves`` function')

    # SELECT OUT THE SOURCES THAT NEED THEIR LCS UPDATED
    sqlQuery = u"""
        SELECT
            a.transientBucketId
        FROM
            (SELECT
                transientBucketId, dateCreated
            FROM
                transientBucket
            WHERE
                survey = 'ATLAS FP' and limitingMag = 0
            ORDER BY dateCreated DESC) a,
            pesstoObjects p
            where p.transientBucketId=a.transientBucketId
            and ((p.atlas_fp_lightcurve < a.dateCreated) or p.atlas_fp_lightcurve is null)
        GROUP BY a.transientBucketId;
    """ % locals()
    rows = readquery(
        log=log,
        sqlQuery=sqlQuery,
        dbConn=dbConn
    )

    total = len(rows)
    print("Generating ATLAS lightcurves for %(total)s sources" % locals())

    index = 1
    for row in rows:

        # SELECT OUT THE LIGHT CURVE DATA FOR A GIVEN ATLAS TRANSIENT
        transientBucketId = row["transientBucketId"]

        if index > 1:
            # Cursor up one line and clear line
            sys.stdout.write("\x1b[1A\x1b[2K")

        percent = (old_div(float(index), float(total))) * 100.
        print('%(index)s/%(total)s (%(percent)1.1f%% done): generating ATLAS LC for transientBucketId: %(transientBucketId)s' % locals())
        index += 1

        sqlQuery = u"""
            SELECT
                atlas_designation,
                mjd_obs,
                filter,
                marshall_mag as mag,
                marshall_mag_error as dm,
                fnu*1e27 as fnu,
                fnu_error*1e27 as fnu_error,
                snr,
                zp,
                marshall_limiting_mag as limiting_mag
            FROM
                fs_atlas_forced_phot
            WHERE
                (skyfit > 0) and
                atlas_designation in (SELECT distinct name
            FROM
                transientBucket
            WHERE
                survey = 'ATLAS FP'
                    AND transientBucketId = %(transientBucketId)s
                    AND dateDeleted IS NULL)
            and fnu is not null;
        """ % locals()
        epochs = readquery(
            log=log,
            sqlQuery=sqlQuery,
            dbConn=dbConn
        )

        # FIND THE CACHE DIR FOR THE SOURCE
        cacheDirectory = settings["downloads"][
            "transient cache directory"] + "/" + str(transientBucketId)

        # CREATE THE PLOT FOR THIS ONE ATLAS SOURCE
        create_lc(
            log=log,
            cacheDirectory=cacheDirectory,
            epochs=epochs
        )

        # UPDATE THE OBJECTS FLAG
        sqlQuery = """update pesstoObjects set atlas_fp_lightcurve = NOW() where transientBucketID = %(transientBucketId)s """ % locals()
        writequery(
            log=log,
            sqlQuery=sqlQuery,
            dbConn=dbConn
        )

    log.debug('completed the ``generate_atlas_lightcurves`` function')
    return None
def add_htm_ids_to_mysql_database_table(
        raColName,
        declColName,
        tableName,
        dbConn,
        log,
        primaryIdColumnName="primaryId",
        cartesian=False,
        batchSize=25000,
        reindex=False):
    """*Given a database connection, a name of a table and the column names for RA and DEC, generates ID for one or more HTM level in the table*

    **Key Arguments:**
        - ``raColName`` -- ra in sexegesimal
        - ``declColName`` -- dec in sexegesimal
        - ``tableName`` -- name of table to add htmid info to
        - ``dbConn`` -- database hosting the above table
        - ``log`` -- logger
        - ``primaryIdColumnName`` -- the primary id for the table
        - ``cartesian`` -- add cartesian columns. Default *False*
        - ``batchSize`` -- the size of the batches of rows to add HTMIds to concurrently. Default *2500*
        - ``reindex`` -- reindex the entire table

    **Return:**
        - None

    **Usage:**

        .. code-block:: python 

            from HMpTy.mysql import add_htm_ids_to_mysql_database_table
            add_htm_ids_to_mysql_database_table(
                raColName="raDeg",
                declColName="decDeg",
                tableName="my_big_star_table",
                dbConn=dbConn,
                log=log,
                primaryIdColumnName="primaryId",
                reindex=False
            )
    """
    log.info('starting the ``add_htm_ids_to_mysql_database_table`` function')

    # TEST TABLE EXIST
    sqlQuery = """show tables"""
    rows = readquery(
        log=log,
        sqlQuery=sqlQuery,
        dbConn=dbConn
    )

    log.debug(
        """Checking the table %(tableName)s exists in the database""" % locals())
    tableList = []
    for row in rows:
        tableList.extend(row.values())
    if tableName not in tableList:
        message = "The %s table does not exist in the database" % (tableName,)
        log.critical(message)
        raise IOError(message)

    log.debug(
        """Checking the RA and DEC columns exist in the %(tableName)s table""" % locals())
    # TEST COLUMNS EXISTS
    cursor = dbConn.cursor(ms.cursors.DictCursor)
    sqlQuery = """SELECT * FROM %s LIMIT 1""" % (tableName,)
    cursor.execute(sqlQuery)
    rows = cursor.fetchall()
    desc = cursor.description
    existingColumns = []
    for i in range(len(desc)):
        existingColumns.append(desc[i][0])
    if (raColName not in existingColumns) or (declColName not in existingColumns):
        message = 'Please make sure you have got the naes of the RA and DEC columns correct'
        log.critical(message)
        raise IOError(message)

    if cartesian:
        # ACTION(S) ##
        htmCols = {
            'htm16ID': 'BIGINT(20)',
            'htm13ID': 'INT',
            'htm10ID': 'INT',
            'cx': 'DOUBLE',
            'cy': 'DOUBLE',
            'cz': 'DOUBLE'
        }
    else:
        htmCols = {
            'htm16ID': 'BIGINT(20)',
            'htm13ID': 'INT',
            'htm10ID': 'INT'
        }

    # CHECK IF COLUMNS EXISTS YET - IF NOT CREATE FROM
    for key in htmCols.keys():
        try:
            log.debug(
                'attempting to check and generate the HTMId columns for the %s db table' %
                (tableName, ))
            colExists = \
                """SELECT *
                    FROM information_schema.COLUMNS
                    WHERE TABLE_SCHEMA=DATABASE()
                    AND COLUMN_NAME='%s'
                    AND TABLE_NAME='%s'""" \
                % (key, tableName)
            colExists = readquery(
                log=log,
                sqlQuery=colExists,
                dbConn=dbConn
            )
            switch = 0
            if not colExists:
                if switch == 0:
                    print "Adding the HTMCircle columns to %(tableName)s" % locals()
                    switch = 1
                sqlQuery = 'ALTER TABLE ' + tableName + ' ADD ' + \
                    key + ' ' + htmCols[key] + ' DEFAULT NULL'
                writequery(
                    log=log,
                    sqlQuery=sqlQuery,
                    dbConn=dbConn,
                )
        except Exception as e:
            log.critical('could not check and generate the HTMId columns for the %s db table - failed with this error: %s '
                         % (tableName, str(e)))
            raise e

    log.debug(
        """Counting the number of rows still requiring HTMID information""" % locals())
    if reindex:
        sqlQuery = u"""
            SELECT INDEX_NAME FROM INFORMATION_SCHEMA.STATISTICS
                WHERE table_schema=DATABASE() AND table_name='%(tableName)s' and COLUMN_NAME = "%(primaryIdColumnName)s";
        """ % locals()
        keyname = readquery(
            log=log,
            sqlQuery=sqlQuery,
            dbConn=dbConn
        )[0]["INDEX_NAME"]
        if keyname != "PRIMARY":
            log.error('To reindex the entire table the primaryID you submit must be unique. "%(primaryIdColumnName)s" is not unique in table "%(tableName)s"' % locals())
            return

        sqlQuery = """ALTER TABLE `%(tableName)s` disable keys""" % locals()
        writequery(
            log=log,
            sqlQuery=sqlQuery,
            dbConn=dbConn
        )

        sqlQuery = """SELECT count(*) as count from `%(tableName)s`""" % locals(
        )
    elif cartesian:
        # COUNT ROWS WHERE HTMIDs ARE NOT SET
        sqlQuery = """SELECT count(*) as count from `%(tableName)s` where htm10ID is NULL or cx is null""" % locals(
        )
    else:
        # COUNT ROWS WHERE HTMIDs ARE NOT SET
        sqlQuery = """SELECT count(*) as count from `%(tableName)s` where htm10ID is NULL""" % locals(
        )
    log.debug(
        """SQLQUERY:\n\n%(sqlQuery)s\n\n""" % locals())
    rowCount = readquery(
        log=log,
        sqlQuery=sqlQuery,
        dbConn=dbConn,
        quiet=False
    )
    totalCount = rowCount[0]["count"]

    # ADD HTMIDs IN BATCHES
    total = totalCount
    batches = int(total / batchSize)

    count = 0
    lastId = False
    # NOW GENERATE THE HTMLIds FOR THESE ROWS
    for i in range(batches + 1):
        if total == 0:
            continue
        count += batchSize
        if count > batchSize:
            # Cursor up one line and clear line
            sys.stdout.write("\x1b[1A\x1b[2K")
        if count > totalCount:
            count = totalCount

        start = time.time()

        log.debug(
            """Selecting the next %(batchSize)s rows requiring HTMID information in the %(tableName)s table""" % locals())
        if reindex:
            # SELECT THE ROWS WHERE THE HTMIds ARE NOT SET
            if lastId:
                sqlQuery = """SELECT `%s`, `%s`, `%s` from `%s` where `%s` > '%s' order by `%s` limit %s""" % (
                    primaryIdColumnName, raColName, declColName, tableName, primaryIdColumnName,  lastId, primaryIdColumnName, batchSize)
            else:
                sqlQuery = """SELECT `%s`, `%s`, `%s` from `%s` order by `%s` limit %s""" % (
                    primaryIdColumnName, raColName, declColName, tableName, primaryIdColumnName, batchSize)
        elif cartesian:
            # SELECT THE ROWS WHERE THE HTMIds ARE NOT SET
            sqlQuery = """SELECT `%s`, `%s`, `%s` from `%s` where `%s` is not null and `%s` > 0 and ((htm10ID is NULL or cx is null)) limit %s""" % (
                primaryIdColumnName, raColName, declColName, tableName, raColName, raColName, batchSize)
        else:
            # SELECT THE ROWS WHERE THE HTMIds ARE NOT SET
            sqlQuery = """SELECT `%s`, `%s`, `%s` from `%s` where `%s` is not null and `%s` > 0 and htm10ID is NULL limit %s""" % (
                primaryIdColumnName, raColName, declColName, tableName, raColName, raColName, batchSize)
        batch = readquery(
            log=log,
            sqlQuery=sqlQuery,
            dbConn=dbConn
        )
        if reindex and len(batch):
            lastId = batch[-1][primaryIdColumnName]
        log.debug(
            """The next %(batchSize)s rows requiring HTMID information have now been selected""" % locals())

        raList = []
        decList = []
        pIdList = []
        raList[:] = [r[raColName] for r in batch]
        decList[:] = [r[declColName] for r in batch]
        pIdList[:] = [r[primaryIdColumnName] for r in batch]

        from HMpTy import htm
        mesh16 = htm.HTM(16)
        mesh13 = htm.HTM(13)
        mesh10 = htm.HTM(10)

        log.debug(
            'calculating htmIds for batch of %s rows in %s db table' % (batchSize, tableName, ))
        htm16Ids = mesh16.lookup_id(raList, decList)
        htm13Ids = mesh13.lookup_id(raList, decList)
        htm10Ids = mesh10.lookup_id(raList, decList)
        log.debug(
            'finshed calculating htmIds for batch of %s rows in %s db table' % (batchSize, tableName, ))
        if cartesian:
            log.debug(
                'calculating cartesian coordinates for batch of %s rows in %s db table' % (batchSize, tableName, ))
            cx = []
            cy = []
            cz = []
            for r, d in zip(raList, decList):
                r = math.radians(r)
                d = math.radians(d)
                cos_dec = math.cos(d)
                cx.append(math.cos(r) * cos_dec)
                cy.append(math.sin(r) * cos_dec)
                cz.append(math.sin(d))

            sqlQuery = ""
            for h16, h13, h10, pid, cxx, cyy, czz in zip(htm16Ids, htm13Ids, htm10Ids, pIdList, cx, cy, cz):

                sqlQuery += \
                    """UPDATE `%s` SET htm16ID=%s, htm13ID=%s, htm10ID=%s, cx=%s, cy=%s, cz=%s where `%s` = '%s';\n""" \
                    % (
                        tableName,
                        h16,
                        h13,
                        h10,
                        cxx,
                        cyy,
                        czz,
                        primaryIdColumnName,
                        pid
                    )

            log.debug(
                'finished calculating cartesian coordinates for batch of %s rows in %s db table' % (
                    batchSize, tableName, ))
        else:
            log.debug('building the sqlquery')
            updates = []
            updates[:] = ["UPDATE `%(tableName)s` SET htm16ID=%(h16)s, htm13ID=%(h13)s, htm10ID=%(h10)s where %(primaryIdColumnName)s = '%(pid)s';" % locals() for h16,
                          h13, h10, pid in zip(htm16Ids, htm13Ids, htm10Ids, pIdList)]
            sqlQuery = "\n".join(updates)
            log.debug('finshed building the sqlquery')

        if len(sqlQuery):
            log.debug(
                'starting to update the HTMIds for new objects in the %s db table' % (tableName, ))
            writequery(
                log=log,
                sqlQuery=sqlQuery,
                dbConn=dbConn,
            )
            log.debug(
                'finished updating the HTMIds for new objects in the %s db table' % (tableName, ))
        else:
            log.debug(
                'no HTMIds to add to the %s db table' % (tableName, ))

        percent = float(count) * 100. / float(totalCount)
        print "%(count)s / %(totalCount)s htmIds added to %(tableName)s (%(percent)0.5f%% complete)" % locals()
        end = time.time()
        timediff = end - start
        timediff = timediff * 1000000. / float(batchSize)
        print "Update speed: %(timediff)0.2fs/1e6 rows\n" % locals()

    # APPLY INDEXES IF NEEDED
    sqlQuery = ""
    for index in ["htm10ID", "htm13ID", "htm16ID"]:
        log.debug('adding %(index)s index to %(tableName)s' % locals())
        iname = "idx_" + index
        asqlQuery = u"""
            SELECT COUNT(1) IndexIsThere FROM INFORMATION_SCHEMA.STATISTICS
                WHERE table_schema=DATABASE() AND table_name='%(tableName)s' AND index_name='%(iname)s';
        """ % locals()
        count = readquery(
            log=log,
            sqlQuery=asqlQuery,
            dbConn=dbConn
        )[0]["IndexIsThere"]

        if count == 0:
            if not len(sqlQuery):
                sqlQuery += u"""
                    ALTER TABLE %(tableName)s ADD INDEX `%(iname)s` (`%(index)s` ASC)
                """ % locals()
            else:
                sqlQuery += u""", ADD INDEX `%(iname)s` (`%(index)s` ASC)""" % locals()
    if len(sqlQuery):
        writequery(
            log=log,
            sqlQuery=sqlQuery + ";",
            dbConn=dbConn,
        )
    log.debug('finished adding indexes to %(tableName)s' % locals())

    if reindex:
        print "Re-enabling keys within the '%(tableName)s' table" % locals()
        sqlQuery = """ALTER TABLE `%(tableName)s` enable keys""" % locals()
        writequery(
            log=log,
            sqlQuery=sqlQuery,
            dbConn=dbConn
        )

    print "All HTMIds added to %(tableName)s" % locals()

    log.info('completed the ``add_htm_ids_to_mysql_database_table`` function')
    return None
Example #43
0
def insert_list_of_dictionaries_into_database_tables(dbConn,
                                                     log,
                                                     dictList,
                                                     dbTableName,
                                                     uniqueKeyList=[],
                                                     dateModified=False,
                                                     batchSize=2500,
                                                     replace=False):
    """insert list of dictionaries into database tables

    **Key Arguments:**
        - ``dbConn`` -- mysql database connection
        - ``log`` -- logger
        - ``dictList`` -- list of python dictionaries to add to the database table
        - ``dbTableName`` -- name of the database table
        - ``uniqueKeyList`` -- a list of column names to append as a unique constraint on the database
        - ``dateModified`` -- add the modification date as a column in the database
        - ``batchSize`` -- batch the insert commands into *batchSize* batches
        - ``replace`` -- repalce row if a duplicate is found

    **Return:**
        - None

    **Usage:**

        .. code-block:: python

            from fundamentals.mysql import insert_list_of_dictionaries_into_database_tables
            insert_list_of_dictionaries_into_database_tables(
                dbConn=dbConn,
                log=log,
                dictList=dictList,
                dbTableName="test_insert_many",
                uniqueKeyList=["col1", "col3"],
                dateModified=False,
                batchSize=2500
            )
    """

    log.info(
        'starting the ``insert_list_of_dictionaries_into_database_tables`` function'
    )

    if len(dictList) == 0:
        log.warning('the dictionary to be added to the database is empty' %
                    locals())
        return None

    if len(dictList):
        convert_dictionary_to_mysql_table(dbConn=dbConn,
                                          log=log,
                                          dictionary=dictList[0],
                                          dbTableName=dbTableName,
                                          uniqueKeyList=uniqueKeyList,
                                          dateModified=dateModified,
                                          replace=replace)

    total = len(dictList[1:])
    batches = int(total / batchSize)

    start = 0
    end = 0
    theseBatches = []
    for i in range(batches + 1):
        end = end + batchSize
        start = i * batchSize
        thisBatch = dictList[start:end]
        theseBatches.append(thisBatch)

    totalCount = total
    count = 0

    for batch in theseBatches:
        count += len(batch)
        if count > batchSize:
            # Cursor up one line and clear line
            sys.stdout.write("\x1b[1A\x1b[2K")
        if count > totalCount:
            count = totalCount
        print "%(count)s / %(totalCount)s rows inserted into %(dbTableName)s" % locals(
        )

        inserted = False
        while inserted == False:
            theseInserts = []
            for aDict in batch:

                insertCommand, valueTuple = convert_dictionary_to_mysql_table(
                    dbConn=dbConn,
                    log=log,
                    dictionary=aDict,
                    dbTableName=dbTableName,
                    uniqueKeyList=uniqueKeyList,
                    dateModified=dateModified,
                    returnInsertOnly=True,
                    replace=replace)
                theseInserts.append(valueTuple)

            message = ""
            # log.debug('adding new data to the %s table; query: %s' %
            # (dbTableName, addValue))
            message = writequery(log=log,
                                 sqlQuery=insertCommand,
                                 dbConn=dbConn,
                                 Force=True,
                                 manyValueList=theseInserts)

            if message == "unknown column":
                sys.exit(0)
                for aDict in batch:
                    convert_dictionary_to_mysql_table(
                        dbConn=dbConn,
                        log=log,
                        dictionary=aDict,
                        dbTableName=dbTableName,
                        uniqueKeyList=uniqueKeyList,
                        dateModified=dateModified,
                        replace=replace)
            else:
                inserted = True

    log.info(
        'completed the ``insert_list_of_dictionaries_into_database_tables`` function'
    )
    return None