コード例 #1
0
    def __init__(self):
        """
        Constructor.
        """

        self.logger = SEKLogger(__name__, 'debug')
        self.mapper = MECOMapper()
        self.dupeChecker = MECODupeChecker()
        self.dbUtil = MSGDBUtil()
コード例 #2
0
class TestMECOMapper(unittest.TestCase):
    """
    Unit tests for MECO Mapper.
    """

    def setUp(self):
        self.m = MECOMapper()
        self.testColumns = self.m.dbColsMeterData.values()
        self.testLabels = self.m.dbColsMeterData.keys()
        self.testTableName = 'MeterData'
        self.config = MSGConfiger()

    def testMECOMapperCanBeInited(self):
        localMapper = MECOMapper()
        self.assertIsInstance(self.m, type(localMapper))

    def testMappingReturnsAListWithMembersMoreThanZero(self):
        self.assertTrue(len(self.testColumns) > 0)

    def test_fkey_mappings(self):
        """
        Verify fkey mappings for tables.
        """

        print "test_fkey_mappings:"
        errors = 0
        # @todo Add other fkeys here.
        fkeys = {'RegisterData': 'meter_data_id'}

        for table in self.config.insertTables:
            try:
                print self.m.dbColumnsForTable(table)['_fkey']
                if self.m.dbColumnsForTable(table)['_fkey'] != fkeys[table]:
                    errors += 1
            except:
                # The table doesn't have an fkey.
                pass
        self.assertLess(errors, 1)

    # @todo finish writing this test
    def testMappingIsValidForMeterData(self):
        """
        Compare columns that are mapped for given data labels.
        """

        # Given labels from the source data, map them to DB column names.
        srcDataLabels = ('MacID', 'MeterName', 'UtilDeviceID')

    def tearDown(self):
        pass
コード例 #3
0
    def __init__(self):
        """
        Constructor.
        """

        self.logger = MSGLogger(__name__, 'debug')
        self.mapper = MECOMapper()
        self.dupeChecker = MECODupeChecker()
        self.dbUtil = MSGDBUtil()
コード例 #4
0
class MECODBInserter(object):
    """
    Provides methods that perform insertion of MECO data.
    """
    def __init__(self):
        """
        Constructor.
        """

        self.logger = SEKLogger(__name__, 'debug')
        self.mapper = MECOMapper()
        self.dupeChecker = MECODupeChecker()
        self.dbUtil = MSGDBUtil()

    def __call__(self, param):
        print "CallableClass.__call__(%s)" % param

    def insertData(self,
                   conn,
                   tableName,
                   columnsAndValues,
                   fKeyVal=None,
                   withoutCommit=0):
        """
        Given a table name and a dictionary of column names and values,
        insert them to the DB.

        :param conn: database connection
        :param tableName: name of the db table
        :param columnsAndValues: dictionary of columns and values to be
        inserted to the db
        :param (optional) fKeyVal: an explicit foreign key value
        :param (optional) withoutCommit: a flag indicated that the insert
        will not be immediately committed
        :returns: A database cursor.
        """

        cur = conn.cursor()

        # Get a dictionary of mapped (from DB to source data) column names.
        columnDict = self.mapper.getDBColNameDict(tableName)

        dbColsAndVals = {}

        if VISUALIZE_DATA:
            print "----------" + tableName + "----------"
            print columnDict
            print columnsAndValues

        for col in columnDict.keys():

            # Use default as the value for the primary key so that the
            # private key is obtained from the predefined sequence.
            if col == '_pkey':
                if VISUALIZE_DATA:
                    print columnDict[col],  # DB col name.
                    print 'DEFAULT'
                dbColsAndVals[columnDict[col]] = 'DEFAULT'

            # For the foreign key, set the value from the given parameter.
            elif col == '_fkey':
                if VISUALIZE_DATA:
                    print columnDict[col],  # DB col name.
                    print fKeyVal
                dbColsAndVals[columnDict[col]] = fKeyVal

            else:
                if VISUALIZE_DATA:
                    print columnDict[col],  # DB col name.

                # The Register and Reading tables need to handle NULL
                # values as a special case.
                if tableName == 'Register' or tableName == 'Reading':
                    try:
                        if VISUALIZE_DATA:
                            print columnsAndValues[col]  # data source value
                        dbColsAndVals[columnDict[col]] = columnsAndValues[col]
                    except:
                        if VISUALIZE_DATA:
                            print 'NULL'
                        dbColsAndVals[columnDict[col]] = 'NULL'

                # For all other cases, simply pass the value.
                else:
                    if VISUALIZE_DATA:
                        print columnsAndValues[col]  # data source value
                    dbColsAndVals[columnDict[col]] = columnsAndValues[col]

        # Add a creation timestamp to MeterData.
        if tableName == 'MeterData':
            dbColsAndVals['created'] = 'NOW()'

        cols = []
        vals = []
        for col in dbColsAndVals.keys():
            cols.append(col)

            # DEFAULT, NULL and NOW() need to appear without quotes.
            if dbColsAndVals[col] in {'DEFAULT', 'NULL', 'NOW()'}:
                vals.append(dbColsAndVals[col])
            else:
                vals.append(
                    "'%s'" %
                    dbColsAndVals[col])  # Surround value with single quotes.

        sql = """INSERT INTO "%s" (%s) VALUES (%s)""" % (
            tableName, ','.join(cols), ','.join(vals))

        self.dbUtil.executeSQL(cur, sql)

        if withoutCommit == 0:
            try:
                conn.commit()
            except:
                self.logger.log("ERROR: Commit failed.", 'debug')

        return cur
コード例 #5
0
    def __init__(self, testing=False):
        """
        Constructor.

        :param testing: (optional) Boolean indicating if Testing Mode is on.
        """

        self.logger = SEKLogger(__name__, "silent")

        if testing:
            self.logger.log("Testing Mode is ON.", "info")

        self.debug = False
        self.configer = MSGConfiger()
        if self.configer.configOptionValue("Debugging", "debug") == True:
            self.debug = True

        self.util = MSGDBUtil()
        self.mapper = MECOMapper()
        self.connector = MSGDBConnector(testing)
        self.conn = self.connector.connectDB()
        self.filename = None
        self.fileObject = None
        self.processForInsertElementCount = 0
        self.inserter = MECODBInserter()
        self.insertDataIntoDatabase = False

        # Count number of times sections in source data are encountered.
        self.tableNameCount = {
            "SSNExportDocument": 0,
            "MeterData": 0,
            "RegisterData": 0,
            "RegisterRead": 0,
            "Tier": 0,
            "Register": 0,
            "IntervalReadData": 0,
            "Interval": 0,
            "Reading": 0,
            "IntervalStatus": 0,
            "ChannelStatus": 0,
            "EventData": 0,
            "Event": 0,
        }

        # Use this dictionary to track which channels were processed when
        # readings are being processed. this is to prevent duplicate channel
        # data from being inserted.
        self.channelProcessed = {}

        self.initChannelProcessed()

        # Tables to be inserted to.
        self.insertTables = self.configer.insertTables

        self.lastSeqVal = None
        self.fKeyVal = None
        self.lastTable = None
        self.fkDeterminer = MECOFKDeterminer()
        self.dupeChecker = MECODupeChecker()
        self.currentMeterName = None
        self.currentIntervalEndTime = None
        self.currentRegisterReadReadTime = None
        self.dupesExist = False
        self.channelDupeExists = False  # For Reading dupes.
        self.numberDupeExists = False  # For Register dupes.
        self.eventTimeDupeExists = False  # For Event dupes.
        self.commitCount = 0
        self.readingDupeOnInsertCount = 0  # For Reading dupes.
        self.registerDupeOnInsertCount = 0  # For Register dupes.
        self.eventDupeOnInsertCount = 0  # For Event dupes.
        self.dataProcessCount = 0
        self.readingDupeCheckCount = 0  # For Reading dupes.
        self.registerDupeCheckCount = 0  # For Register dupes.
        self.eventDupeCheckCount = 0  # For Event dupes.
        self.insertCount = 0
        self.cumulativeInsertCount = 0
        self.nonProcessForInsertElementCount = 0
        self.readingInsertCount = 0
        self.registerInsertCount = 0
        self.eventInsertCount = 0
        self.totalReadingInsertCount = 0
        self.totalRegisterInsertCount = 0
        self.totalEventInsertCount = 0
        self.totalReadingDupeOnInsertCount = 0
        self.totalRegisterDupeOnInsertCount = 0
        self.totalEventDupeOnInsertCount = 0
コード例 #6
0
class MECOXMLParser(object):
    """
    Parses XML for MECO data.
    """

    tableName = ""

    def __init__(self, testing=False):
        """
        Constructor.

        :param testing: (optional) Boolean indicating if Testing Mode is on.
        """

        self.logger = SEKLogger(__name__, "silent")

        if testing:
            self.logger.log("Testing Mode is ON.", "info")

        self.debug = False
        self.configer = MSGConfiger()
        if self.configer.configOptionValue("Debugging", "debug") == True:
            self.debug = True

        self.util = MSGDBUtil()
        self.mapper = MECOMapper()
        self.connector = MSGDBConnector(testing)
        self.conn = self.connector.connectDB()
        self.filename = None
        self.fileObject = None
        self.processForInsertElementCount = 0
        self.inserter = MECODBInserter()
        self.insertDataIntoDatabase = False

        # Count number of times sections in source data are encountered.
        self.tableNameCount = {
            "SSNExportDocument": 0,
            "MeterData": 0,
            "RegisterData": 0,
            "RegisterRead": 0,
            "Tier": 0,
            "Register": 0,
            "IntervalReadData": 0,
            "Interval": 0,
            "Reading": 0,
            "IntervalStatus": 0,
            "ChannelStatus": 0,
            "EventData": 0,
            "Event": 0,
        }

        # Use this dictionary to track which channels were processed when
        # readings are being processed. this is to prevent duplicate channel
        # data from being inserted.
        self.channelProcessed = {}

        self.initChannelProcessed()

        # Tables to be inserted to.
        self.insertTables = self.configer.insertTables

        self.lastSeqVal = None
        self.fKeyVal = None
        self.lastTable = None
        self.fkDeterminer = MECOFKDeterminer()
        self.dupeChecker = MECODupeChecker()
        self.currentMeterName = None
        self.currentIntervalEndTime = None
        self.currentRegisterReadReadTime = None
        self.dupesExist = False
        self.channelDupeExists = False  # For Reading dupes.
        self.numberDupeExists = False  # For Register dupes.
        self.eventTimeDupeExists = False  # For Event dupes.
        self.commitCount = 0
        self.readingDupeOnInsertCount = 0  # For Reading dupes.
        self.registerDupeOnInsertCount = 0  # For Register dupes.
        self.eventDupeOnInsertCount = 0  # For Event dupes.
        self.dataProcessCount = 0
        self.readingDupeCheckCount = 0  # For Reading dupes.
        self.registerDupeCheckCount = 0  # For Register dupes.
        self.eventDupeCheckCount = 0  # For Event dupes.
        self.insertCount = 0
        self.cumulativeInsertCount = 0
        self.nonProcessForInsertElementCount = 0
        self.readingInsertCount = 0
        self.registerInsertCount = 0
        self.eventInsertCount = 0
        self.totalReadingInsertCount = 0
        self.totalRegisterInsertCount = 0
        self.totalEventInsertCount = 0
        self.totalReadingDupeOnInsertCount = 0
        self.totalRegisterDupeOnInsertCount = 0
        self.totalEventDupeOnInsertCount = 0

    def parseXML(self, fileObject, insert=False, jobID=""):
        """
        Parse an XML file.

        :param fileObject: a file object referencing an XML file.
        :param insert: (optional) True to insert to the database | False to
        perform no
        inserts.
        :returns: String containing a concise log of parsing.
        """

        print "parseXML:"

        self.commitCount = 0
        self.insertDataIntoDatabase = insert

        parseMsg = "\nParsing XML in %s.\n" % self.filename
        sys.stderr.write(parseMsg)
        parseLog = parseMsg

        tree = ET.parse(fileObject)
        root = tree.getroot()

        parseLog += self.walkTheTreeFromRoot(root, jobID=jobID)

        return parseLog

    def tableNameForAnElement(self, element):
        """
        Get the tablename for an element.

        :param element: Element tree element.
        :returns: table name
        """

        try:
            name = re.search("\{.*\}(.*)", element.tag).group(1)
        except:
            name = None
        return name

    def processDataToBeInserted(self, columnsAndValues, currentTableName, fKeyValue, parseLog, pkeyCol, jobID=""):
        """
        This is the method that performs insertion of parsed data to the
        database. Duplicate checks are performed on the endpoints of the data
         branches.

        :param columnsAndValues: A dictionary containing columns and their
        values.
        :param currentTableName: The name of the current table.
        :param fKeyValue: The value of the foreign key.
        :param parseLog: String containing a concise log of operations.
        :param pkeyCol: Column name for the primary key.
        :param jobID: Identifier for multiprocessing process.
        :returns: A string containing the parse log.
        """

        self.dataProcessCount += 1

        # Handle a special case for duplicate reading data.
        # Intercept the duplicate reading data before insert.
        if currentTableName == "Reading":
            self.channelDupeExists = self.dupeChecker.readingBranchDupeExists(
                self.conn, self.currentMeterName, self.currentIntervalEndTime, columnsAndValues["Channel"]
            )
            self.readingDupeCheckCount += 1

        if currentTableName == "Register":
            self.numberDupeExists = self.dupeChecker.registerBranchDupeExists(
                self.conn, self.currentMeterName, self.currentRegisterReadReadTime, columnsAndValues["Number"]
            )
            self.registerDupeCheckCount += 1

        if currentTableName == "Event":
            self.eventTimeDupeExists = self.dupeChecker.eventBranchDupeExists(
                self.conn, self.currentMeterName, columnsAndValues["EventTime"]
            )
            self.eventDupeCheckCount += 1

        # Only perform an insert if there are no duplicate values
        # for the channel.
        if not self.channelDupeExists and not self.numberDupeExists and not self.eventTimeDupeExists:

            # ***********************
            # ***** INSERT DATA *****
            # ***********************
            cur = self.inserter.insertData(
                self.conn, currentTableName, columnsAndValues, fKeyVal=fKeyValue, withoutCommit=1
            )
            # The last 1 indicates don't commit. Commits are handled externally.
            self.insertCount += 1
            self.cumulativeInsertCount += 1

            # Only attempt getting the last sequence value if an insertion
            # took place.
            self.lastSeqVal = self.util.getLastSequenceID(self.conn, currentTableName, pkeyCol)
            # Store the primary key.
            self.fkDeterminer.pkValforCol[pkeyCol] = self.lastSeqVal

            if currentTableName == "Reading":
                self.readingInsertCount += 1
                self.totalReadingInsertCount += 1
            elif currentTableName == "Register":
                self.registerInsertCount += 1
                self.totalRegisterInsertCount += 1
            elif currentTableName == "Event":
                self.eventInsertCount += 1
                self.totalEventInsertCount += 1

        else:  # Don't insert into Reading or Register table if a dupe exists.

            if self.channelDupeExists:
                self.readingDupeOnInsertCount += 1
                self.totalReadingDupeOnInsertCount += 1
                if self.readingDupeOnInsertCount > 0 and self.readingDupeOnInsertCount < 2:
                    parseLog += self.logger.logAndWrite("%s:{rd-dupe==>}" % jobID)

                # Also, verify the data is equivalent to the existing record.
                matchingValues = self.dupeChecker.readingValuesAreInTheDatabase(self.conn, columnsAndValues)
                assert matchingValues == True, (
                    "Duplicate check found "
                    "non-matching values for meter"
                    " %s,"
                    " endtime %s, channel %s (%s, "
                    "%s)."
                    % (
                        self.currentMeterName,
                        self.currentIntervalEndTime,
                        columnsAndValues["Channel"],
                        columnsAndValues["RawValue"],
                        columnsAndValues["Value"],
                    )
                )

                self.channelDupeExists = False

            elif self.numberDupeExists:
                self.registerDupeOnInsertCount += 1
                self.totalRegisterDupeOnInsertCount += 1
                if self.registerDupeOnInsertCount > 0 and self.registerDupeOnInsertCount < 2:
                    parseLog += self.logger.logAndWrite("%s:{re-dupe==>}" % jobID)

                self.numberDupeExists = False

            elif self.eventTimeDupeExists:
                self.eventDupeOnInsertCount += 1
                self.totalEventDupeOnInsertCount += 1
                if self.eventDupeOnInsertCount > 0 and self.eventDupeOnInsertCount < 2:
                    parseLog += self.logger.logAndWrite("%s:{ev-dupe==>}" % jobID)

                self.eventTimeDupeExists = False

            else:
                assert True == False, "Duplicate condition does not exist."

            self.logger.log("Record not inserted for %s." % columnsAndValues, "silent")

        return parseLog

    def generateConciseLogEntries(self, jobID="", reportType=None):
        """
        Create log entries in the concise log.

        :param jobID: Identifier used to distinguish multiprocessing jobs.
        :returns: A concatenated string of log entries.
        """

        # @todo Change report type to enum type.

        log = ""
        if reportType == "FINAL":
            self.logger.log("Final report", "info")

            if (
                self.readingDupeOnInsertCount > 0
                or self.registerDupeOnInsertCount > 0
                or self.eventDupeOnInsertCount > 0
            ):
                log = self.logger.logAndWrite(
                    "%s:{%srd,%sre,%sev}"
                    % (
                        jobID,
                        self.totalReadingDupeOnInsertCount,
                        self.totalRegisterDupeOnInsertCount,
                        self.totalEventDupeOnInsertCount,
                    )
                )
            else:
                log = ""
            log += self.logger.logAndWrite("(%s)" % self.commitCount)
            log += self.logger.logAndWrite("[%s]" % self.processForInsertElementCount)
            log += self.logger.logAndWrite(
                "<%srd,%sre,%sev,%s>"
                % (
                    self.totalReadingInsertCount,
                    self.totalRegisterInsertCount,
                    self.totalEventInsertCount,
                    self.cumulativeInsertCount,
                )
            )

        elif reportType == "INTERMEDIARY":

            if (
                self.readingDupeOnInsertCount > 0
                or self.registerDupeOnInsertCount > 0
                or self.eventDupeOnInsertCount > 0
            ):
                log = self.logger.logAndWrite(
                    "%s:{%srd,%sre,%sev}"
                    % (
                        jobID,
                        self.readingDupeOnInsertCount,
                        self.registerDupeOnInsertCount,
                        self.eventDupeOnInsertCount,
                    )
                )
            else:
                log = ""
            log += self.logger.logAndWrite("(%s)" % self.commitCount)
            log += self.logger.logAndWrite("[%s]" % self.processForInsertElementCount)
            log += self.logger.logAndWrite(
                "<%srd,%sre,%sev,%s,%s>"
                % (
                    self.readingInsertCount,
                    self.registerInsertCount,
                    self.eventInsertCount,
                    self.insertCount,
                    self.cumulativeInsertCount,
                )
            )
        return log

    def resetGroupCounters(self):
        """
        Reset counters that are keeping track of groups.
        """

        self.readingDupeOnInsertCount = 0
        self.insertCount = 0
        self.readingInsertCount = 0
        self.registerDupeOnInsertCount = 0
        self.registerInsertCount = 0
        self.eventInsertCount = 0
        self.eventDupeOnInsertCount = 0

    def performTableBasedOperations(self, columnsAndValues, currentTableName, element):
        """
        Perform operations that are based on the current table.

        :param columnsAndValues
        :param currentTableName
        :param element
        """

        if currentTableName == "MeterData":
            self.currentMeterName = columnsAndValues["MeterName"]

        elif currentTableName == "Interval":
            self.currentIntervalEndTime = columnsAndValues["EndTime"]

        elif currentTableName == "RegisterRead":
            self.currentRegisterReadReadTime = columnsAndValues["ReadTime"]

        elif currentTableName == "Event":
            columnsAndValues["Event_Content"] = element.text

    def walkTheTreeFromRoot(self, root, jobID=""):
        """
        Walk an XML tree from its root node.

        :param root: The root node of an XML tree.
        :param jobID: Identifier used to distinguish multiprocessing jobs.
        :returns: String containing a concise log of parsing activity.
        """

        parseLog = ""
        walker = root.iter()

        for element, nextElement in self.getNext(walker):
            # Process every element in the tree while reading ahead to get
            # the next element.

            currentTableName = self.tableNameForAnElement(element)
            nextTableName = self.tableNameForAnElement(nextElement)
            assert currentTableName is not None, "Current table does not exist."

            # Maintain a count of tables encountered.
            self.tableNameCount[currentTableName] += 1

            columnsAndValues = {}
            it = iter(sorted(element.attrib.iteritems()))

            for item in list(it):
                # Create a dictionary of column names and values.
                columnsAndValues[item[0]] = item[1]

            if currentTableName in self.insertTables:
                # Check if the current table is one of the tables to have data
                # inserted.

                self.processForInsertElementCount += 1

                if self.debug:
                    self.logger.log("Processing table %s, next is %s." % (currentTableName, nextTableName), "debug")

                # Get the column name for the primary key.
                pkeyCol = self.mapper.dbColumnsForTable(currentTableName)["_pkey"]

                fkeyCol = None
                fKeyValue = None

                try:
                    # Get the column name for the foreign key.
                    fkeyCol = self.mapper.dbColumnsForTable(currentTableName)["_fkey"]
                except:
                    pass

                if self.debug:
                    self.logger.log("foreign key col (fkey) = %s" % fkeyCol, "debug")
                    self.logger.log("primary key col (pkey) = %s" % pkeyCol, "debug")
                    self.logger.log(columnsAndValues, "debug")

                if fkeyCol is not None:
                    # Get the foreign key value.
                    fKeyValue = self.fkDeterminer.pkValforCol[fkeyCol]

                if self.debug:
                    self.logger.log("fKeyValue = %s" % fKeyValue, "debug")

                self.performTableBasedOperations(columnsAndValues, currentTableName, element)

                if self.insertDataIntoDatabase:
                    # Data is intended to be inserted into the database.
                    parseLog = self.processDataToBeInserted(
                        columnsAndValues, currentTableName, fKeyValue, parseLog, pkeyCol, jobID=jobID
                    )

                if self.debug:
                    self.logger.log("lastSeqVal = ", self.lastSeqVal)

                if self.lastReading(currentTableName, nextTableName):
                    # The last reading set has been reached.

                    if self.debug:
                        self.logger.log("----- last reading found -----", "debug")

                    parseLog += self.generateConciseLogEntries(jobID=jobID, reportType="INTERMEDIARY")
                    self.resetGroupCounters()

                    parseLog += self.logger.logAndWrite("*")
                    self.commitCount += 1
                    self.conn.commit()

                if self.lastRegister(currentTableName, nextTableName):
                    # The last register set has been reached.

                    if self.debug:
                        self.logger.log("----- last register found -----", "debug")

        # Initial commit.
        if self.commitCount == 0:
            parseLog += self.generateConciseLogEntries(jobID=jobID, reportType="INTERMEDIARY")
        self.resetGroupCounters()

        # Final commit.
        parseLog += self.logger.logAndWrite("---")
        parseLog += self.generateConciseLogEntries(jobID=jobID, reportType="FINAL")
        self.resetGroupCounters()

        parseLog += self.logger.logAndWrite("*")
        self.commitCount += 1
        self.conn.commit()
        sys.stderr.write("\n")

        self.logger.log("Data process count = %s." % self.dataProcessCount, "info")
        self.logger.log("Reading dupe check count = %s." % self.readingDupeCheckCount, "info")
        return parseLog

    def lastReading(self, currentTable, nextTable):
        """
        Determine if the last reading is being visited.

        :param currentTable: current table being processsed.
        :param nextTable: next table to be processed.
        :returns: True if last object in Reading table was read,
        otherwise return False.
        """

        if currentTable == "Reading" and (nextTable == "MeterData" or nextTable == None):
            return True
        return False

    def lastRegister(self, currentTable, nextTable):
        """
        Determine if the last register is being visited.

        :param currentTable: current table being processsed.
        :param nextTable: next table to be processed.
        :returns: True if last object in Register table was read,
        otherwise return False.
        """

        if currentTable == "Register" and (nextTable == "MeterData" or nextTable == None):
            return True
        return False

    def getNext(self, somethingIterable, window=1):
        """
        Return the current item and next item in an iterable data structure.

        :param somethingIterable: Something that has an iterator.
        :param window: How far to look ahead in the collection.
        :returns: The current iterable value and the next iterable value.
        """

        items, nexts = tee(somethingIterable, 2)
        nexts = islice(nexts, window, None)
        return izip_longest(items, nexts)

    def initChannelProcessed(self):
        """
        Initialize the dictionary for channel processing.
        """

        self.channelProcessed = {"1": False, "2": False, "3": False, "4": False}

    def getLastElement(self, rows):
        """
        Get the last element in a collection.

        Example:
            rows = (element1, element2, element3)
            getLastElement(rows) # return element3

        :param rows: Result rows from a query.
        :returns: The last element in the collection.
        """

        for i, var in enumerate(rows):
            if i == len(rows) - 1:
                return var
コード例 #7
0
class MECODBInserter(object):
    """
    Provides methods that perform insertion of MECO data.
    """

    def __init__(self):
        """
        Constructor.
        """

        self.logger = MSGLogger(__name__, 'debug')
        self.mapper = MECOMapper()
        self.dupeChecker = MECODupeChecker()
        self.dbUtil = MSGDBUtil()

    def __call__(self, param):
        print "CallableClass.__call__(%s)" % param

    def insertData(self, conn, tableName, columnsAndValues, fKeyVal = None,
                   withoutCommit = 0):
        """
        Given a table name and a dictionary of column names and values,
        insert them to the DB.

        :param conn: database connection
        :param tableName: name of the db table
        :param columnsAndValues: dictionary of columns and values to be
        inserted to the db
        :param (optional) fKeyVal: an explicit foreign key value
        :param (optional) withoutCommit: a flag indicated that the insert
        will not be immediately committed
        :returns: A database cursor.
        """

        cur = conn.cursor()

        # Get a dictionary of mapped (from DB to source data) column names.
        columnDict = self.mapper.getDBColNameDict(tableName)

        dbColsAndVals = {}

        if VISUALIZE_DATA:
            print "----------" + tableName + "----------"
            print columnDict
            print columnsAndValues

        for col in columnDict.keys():

            # Use default as the value for the primary key so that the
            # private key is obtained from the predefined sequence.
            if col == '_pkey':
                if VISUALIZE_DATA:
                    print columnDict[col], # DB col name.
                    print 'DEFAULT'
                dbColsAndVals[columnDict[col]] = 'DEFAULT'

            # For the foreign key, set the value from the given parameter.
            elif col == '_fkey':
                if VISUALIZE_DATA:
                    print columnDict[col], # DB col name.
                    print fKeyVal
                dbColsAndVals[columnDict[col]] = fKeyVal

            else:
                if VISUALIZE_DATA:
                    print columnDict[col], # DB col name.

                # The Register and Reading tables need to handle NULL
                # values as a special case.
                if tableName == 'Register' or tableName == 'Reading':
                    try:
                        if VISUALIZE_DATA:
                            print columnsAndValues[col] # data source value
                        dbColsAndVals[columnDict[col]] = columnsAndValues[col]
                    except:
                        if VISUALIZE_DATA:
                            print 'NULL'
                        dbColsAndVals[columnDict[col]] = 'NULL'

                # For all other cases, simply pass the value.
                else:
                    if VISUALIZE_DATA:
                        print columnsAndValues[col] # data source value
                    dbColsAndVals[columnDict[col]] = columnsAndValues[col]

        # Add a creation timestamp to MeterData.
        if tableName == 'MeterData':
            dbColsAndVals['created'] = 'NOW()'

        cols = []
        vals = []
        for col in dbColsAndVals.keys():
            cols.append(col)

            # DEFAULT, NULL and NOW() need to appear without quotes.
            if dbColsAndVals[col] in {'DEFAULT', 'NULL', 'NOW()'}:
                vals.append(dbColsAndVals[col])
            else:
                vals.append("'%s'" % dbColsAndVals[
                    col]) # Surround value with single quotes.

        sql = """INSERT INTO "%s" (%s) VALUES (%s)""" % (
        tableName, ','.join(cols), ','.join(vals))

        self.dbUtil.executeSQL(cur, sql)

        if withoutCommit == 0:
            try:
                conn.commit()
            except:
                self.logger.log("ERROR: Commit failed.", 'debug')

        return cur
コード例 #8
0
 def testMECOMapperCanBeInited(self):
     localMapper = MECOMapper()
     self.assertIsInstance(self.m, type(localMapper))
コード例 #9
0
 def setUp(self):
     self.m = MECOMapper()
     self.testColumns = self.m.dbColsMeterData.values()
     self.testLabels = self.m.dbColsMeterData.keys()
     self.testTableName = 'MeterData'
     self.config = MSGConfiger()
コード例 #10
0
    def __init__(self, testing = False):
        """
        Constructor.

        :param testing: (optional) Boolean indicating if Testing Mode is on.
        """

        self.logger = SEKLogger(__name__, 'silent')

        if (testing):
            self.logger.log("Testing Mode is ON.", 'info')

        self.debug = False
        self.configer = MSGConfiger()
        if self.configer.configOptionValue("Debugging", 'debug') == True:
            self.debug = True

        self.util = MSGDBUtil()
        self.mapper = MECOMapper()
        self.connector = MSGDBConnector(testing)
        self.conn = self.connector.connectDB()
        self.filename = None
        self.fileObject = None
        self.processForInsertElementCount = 0
        self.inserter = MECODBInserter()
        self.insertDataIntoDatabase = False

        # Count number of times sections in source data are encountered.
        self.tableNameCount = {'SSNExportDocument': 0, 'MeterData': 0,
                               'RegisterData': 0, 'RegisterRead': 0, 'Tier': 0,
                               'Register': 0, 'IntervalReadData': 0,
                               'Interval': 0, 'Reading': 0, 'IntervalStatus': 0,
                               'ChannelStatus': 0, 'EventData': 0, 'Event': 0}

        # Use this dictionary to track which channels were processed when
        # readings are being processed. this is to prevent duplicate channel
        # data from being inserted.
        self.channelProcessed = {}

        self.initChannelProcessed()

        # Tables to be inserted to.
        self.insertTables = self.configer.insertTables

        self.lastSeqVal = None
        self.fKeyVal = None
        self.lastTable = None
        self.fkDeterminer = MECOFKDeterminer()
        self.dupeChecker = MECODupeChecker()
        self.currentMeterName = None
        self.currentIntervalEndTime = None
        self.currentRegisterReadReadTime = None
        self.dupesExist = False
        self.channelDupeExists = False # For Reading dupes.
        self.numberDupeExists = False # For Register dupes.
        self.eventTimeDupeExists = False # For Event dupes.
        self.commitCount = 0
        self.readingDupeOnInsertCount = 0 # For Reading dupes.
        self.registerDupeOnInsertCount = 0 # For Register dupes.
        self.eventDupeOnInsertCount = 0 # For Event dupes.
        self.dataProcessCount = 0
        self.readingDupeCheckCount = 0 # For Reading dupes.
        self.registerDupeCheckCount = 0 # For Register dupes.
        self.eventDupeCheckCount = 0 # For Event dupes.
        self.insertCount = 0
        self.cumulativeInsertCount = 0
        self.nonProcessForInsertElementCount = 0
        self.readingInsertCount = 0
        self.registerInsertCount = 0
        self.eventInsertCount = 0
        self.totalReadingInsertCount = 0
        self.totalRegisterInsertCount = 0
        self.totalEventInsertCount = 0
        self.totalReadingDupeOnInsertCount = 0
        self.totalRegisterDupeOnInsertCount = 0
        self.totalEventDupeOnInsertCount = 0
コード例 #11
0
class MECOXMLParser(object):
    """
    Parses XML for MECO data.
    """

    tableName = ''

    def __init__(self, testing = False):
        """
        Constructor.

        :param testing: (optional) Boolean indicating if Testing Mode is on.
        """

        self.logger = SEKLogger(__name__, 'silent')

        if (testing):
            self.logger.log("Testing Mode is ON.", 'info')

        self.debug = False
        self.configer = MSGConfiger()
        if self.configer.configOptionValue("Debugging", 'debug') == True:
            self.debug = True

        self.util = MSGDBUtil()
        self.mapper = MECOMapper()
        self.connector = MSGDBConnector(testing)
        self.conn = self.connector.connectDB()
        self.filename = None
        self.fileObject = None
        self.processForInsertElementCount = 0
        self.inserter = MECODBInserter()
        self.insertDataIntoDatabase = False

        # Count number of times sections in source data are encountered.
        self.tableNameCount = {'SSNExportDocument': 0, 'MeterData': 0,
                               'RegisterData': 0, 'RegisterRead': 0, 'Tier': 0,
                               'Register': 0, 'IntervalReadData': 0,
                               'Interval': 0, 'Reading': 0, 'IntervalStatus': 0,
                               'ChannelStatus': 0, 'EventData': 0, 'Event': 0}

        # Use this dictionary to track which channels were processed when
        # readings are being processed. this is to prevent duplicate channel
        # data from being inserted.
        self.channelProcessed = {}

        self.initChannelProcessed()

        # Tables to be inserted to.
        self.insertTables = self.configer.insertTables

        self.lastSeqVal = None
        self.fKeyVal = None
        self.lastTable = None
        self.fkDeterminer = MECOFKDeterminer()
        self.dupeChecker = MECODupeChecker()
        self.currentMeterName = None
        self.currentIntervalEndTime = None
        self.currentRegisterReadReadTime = None
        self.dupesExist = False
        self.channelDupeExists = False # For Reading dupes.
        self.numberDupeExists = False # For Register dupes.
        self.eventTimeDupeExists = False # For Event dupes.
        self.commitCount = 0
        self.readingDupeOnInsertCount = 0 # For Reading dupes.
        self.registerDupeOnInsertCount = 0 # For Register dupes.
        self.eventDupeOnInsertCount = 0 # For Event dupes.
        self.dataProcessCount = 0
        self.readingDupeCheckCount = 0 # For Reading dupes.
        self.registerDupeCheckCount = 0 # For Register dupes.
        self.eventDupeCheckCount = 0 # For Event dupes.
        self.insertCount = 0
        self.cumulativeInsertCount = 0
        self.nonProcessForInsertElementCount = 0
        self.readingInsertCount = 0
        self.registerInsertCount = 0
        self.eventInsertCount = 0
        self.totalReadingInsertCount = 0
        self.totalRegisterInsertCount = 0
        self.totalEventInsertCount = 0
        self.totalReadingDupeOnInsertCount = 0
        self.totalRegisterDupeOnInsertCount = 0
        self.totalEventDupeOnInsertCount = 0


    def parseXML(self, fileObject, insert = False, jobID = ''):
        """
        Parse an XML file.

        :param fileObject: a file object referencing an XML file.
        :param insert: (optional) True to insert to the database | False to
        perform no
        inserts.
        :returns: String containing a concise log of parsing.
        """

        print "parseXML:"

        self.commitCount = 0
        self.insertDataIntoDatabase = insert

        parseMsg = "\nParsing XML in %s.\n" % self.filename
        sys.stderr.write(parseMsg)
        parseLog = parseMsg

        tree = ET.parse(fileObject)
        root = tree.getroot()

        parseLog += self.walkTheTreeFromRoot(root, jobID = jobID)

        return parseLog


    def tableNameForAnElement(self, element):
        """
        Get the tablename for an element.

        :param element: Element tree element.
        :returns: table name
        """

        try:
            name = re.search('\{.*\}(.*)', element.tag).group(1)
        except:
            name = None
        return name


    def processDataToBeInserted(self, columnsAndValues, currentTableName,
                                fKeyValue, parseLog, pkeyCol, jobID = ''):
        """
        This is the method that performs insertion of parsed data to the
        database. Duplicate checks are performed on the endpoints of the data
         branches.

        :param columnsAndValues: A dictionary containing columns and their
        values.
        :param currentTableName: The name of the current table.
        :param fKeyValue: The value of the foreign key.
        :param parseLog: String containing a concise log of operations.
        :param pkeyCol: Column name for the primary key.
        :param jobID: Identifier for multiprocessing process.
        :returns: A string containing the parse log.
        """

        self.dataProcessCount += 1

        # Handle a special case for duplicate reading data.
        # Intercept the duplicate reading data before insert.
        if currentTableName == "Reading":
            self.channelDupeExists = self.dupeChecker.readingBranchDupeExists(
                self.conn, self.currentMeterName, self.currentIntervalEndTime,
                columnsAndValues['Channel'])
            self.readingDupeCheckCount += 1

        if currentTableName == "Register":
            self.numberDupeExists = self.dupeChecker.registerBranchDupeExists(
                self.conn, self.currentMeterName,
                self.currentRegisterReadReadTime, columnsAndValues['Number'])
            self.registerDupeCheckCount += 1

        if currentTableName == "Event":
            self.eventTimeDupeExists = self.dupeChecker.eventBranchDupeExists(
                self.conn, self.currentMeterName, columnsAndValues['EventTime'])
            self.eventDupeCheckCount += 1

        # Only perform an insert if there are no duplicate values
        # for the channel.
        if not self.channelDupeExists and not self.numberDupeExists and not \
            self.eventTimeDupeExists:

            # ***********************
            # ***** INSERT DATA *****
            # ***********************
            cur = self.inserter.insertData(self.conn, currentTableName,
                                           columnsAndValues,
                                           fKeyVal = fKeyValue,
                                           withoutCommit = 1)
            # The last 1 indicates don't commit. Commits are handled externally.
            self.insertCount += 1
            self.cumulativeInsertCount += 1

            # Only attempt getting the last sequence value if an insertion
            # took place.
            self.lastSeqVal = self.util.getLastSequenceID(self.conn,
                                                          currentTableName,
                                                          pkeyCol)
            # Store the primary key.
            self.fkDeterminer.pkValforCol[pkeyCol] = self.lastSeqVal

            if currentTableName == "Reading":
                self.readingInsertCount += 1
                self.totalReadingInsertCount += 1
            elif currentTableName == "Register":
                self.registerInsertCount += 1
                self.totalRegisterInsertCount += 1
            elif currentTableName == "Event":
                self.eventInsertCount += 1
                self.totalEventInsertCount += 1

        else: # Don't insert into Reading or Register table if a dupe exists.

            if (self.channelDupeExists):
                self.readingDupeOnInsertCount += 1
                self.totalReadingDupeOnInsertCount += 1
                if self.readingDupeOnInsertCount > 0 and self\
                    .readingDupeOnInsertCount < 2:
                    parseLog += self.logger.logAndWrite(
                        "%s:{rd-dupe==>}" % jobID)

                # Also, verify the data is equivalent to the existing record.
                matchingValues = self.dupeChecker.readingValuesAreInTheDatabase(
                    self.conn, columnsAndValues)
                assert matchingValues == True, "Duplicate check found " \
                                               "non-matching values for meter" \
                                               " %s," \
                                               " endtime %s, channel %s (%s, " \
                                               "%s)." % (
                                                   self.currentMeterName,
                                                   self.currentIntervalEndTime,
                                                   columnsAndValues['Channel'],
                                                   columnsAndValues['RawValue'],
                                                   columnsAndValues['Value'])

                self.channelDupeExists = False

            elif (self.numberDupeExists):
                self.registerDupeOnInsertCount += 1
                self.totalRegisterDupeOnInsertCount += 1
                if self.registerDupeOnInsertCount > 0 and self\
                    .registerDupeOnInsertCount < 2:
                    parseLog += self.logger.logAndWrite(
                        "%s:{re-dupe==>}" % jobID)

                self.numberDupeExists = False

            elif (self.eventTimeDupeExists):
                self.eventDupeOnInsertCount += 1
                self.totalEventDupeOnInsertCount += 1
                if self.eventDupeOnInsertCount > 0 and self\
                    .eventDupeOnInsertCount < 2:
                    parseLog += self.logger.logAndWrite(
                        "%s:{ev-dupe==>}" % jobID)

                self.eventTimeDupeExists = False

            else:
                assert True == False, "Duplicate condition does not exist."

            self.logger.log('Record not inserted for %s.' % columnsAndValues,
                            'silent')

        return parseLog

    def generateConciseLogEntries(self, jobID = '', reportType = None):
        """
        Create log entries in the concise log.

        :param jobID: Identifier used to distinguish multiprocessing jobs.
        :returns: A concatenated string of log entries.
        """

        # @todo Change report type to enum type.

        log = ''
        if reportType == 'FINAL':
            self.logger.log('Final report', 'info')

            if self.readingDupeOnInsertCount > 0 or self\
                .registerDupeOnInsertCount > 0 or self.eventDupeOnInsertCount\
                    > 0:
                log = self.logger.logAndWrite("%s:{%srd,%sre,%sev}" % (
                    jobID, self.totalReadingDupeOnInsertCount,
                    self.totalRegisterDupeOnInsertCount,
                    self.totalEventDupeOnInsertCount))
            else:
                log = ''
            log += self.logger.logAndWrite("(%s)" % self.commitCount)
            log += self.logger.logAndWrite(
                "[%s]" % self.processForInsertElementCount)
            log += self.logger.logAndWrite("<%srd,%sre,%sev,%s>" % (
                self.totalReadingInsertCount, self.totalRegisterInsertCount,
                self.totalEventInsertCount, self.cumulativeInsertCount))

        elif reportType == 'INTERMEDIARY':

            if self.readingDupeOnInsertCount > 0 or self\
                .registerDupeOnInsertCount > 0 or self.eventDupeOnInsertCount\
                    > 0:
                log = self.logger.logAndWrite("%s:{%srd,%sre,%sev}" % (
                    jobID, self.readingDupeOnInsertCount,
                    self.registerDupeOnInsertCount,
                    self.eventDupeOnInsertCount))
            else:
                log = ''
            log += self.logger.logAndWrite("(%s)" % self.commitCount)
            log += self.logger.logAndWrite(
                "[%s]" % self.processForInsertElementCount)
            log += self.logger.logAndWrite("<%srd,%sre,%sev,%s,%s>" % (
                self.readingInsertCount, self.registerInsertCount,
                self.eventInsertCount, self.insertCount,
                self.cumulativeInsertCount))
        return log

    def resetGroupCounters(self):
        """
        Reset counters that are keeping track of groups.
        """

        self.readingDupeOnInsertCount = 0
        self.insertCount = 0
        self.readingInsertCount = 0
        self.registerDupeOnInsertCount = 0
        self.registerInsertCount = 0
        self.eventInsertCount = 0
        self.eventDupeOnInsertCount = 0

    def performTableBasedOperations(self, columnsAndValues, currentTableName,
                                    element):
        """
        Perform operations that are based on the current table.

        :param columnsAndValues
        :param currentTableName
        :param element
        """

        if currentTableName == "MeterData":
            self.currentMeterName = columnsAndValues['MeterName']

        elif currentTableName == "Interval":
            self.currentIntervalEndTime = columnsAndValues['EndTime']

        elif currentTableName == "RegisterRead":
            self.currentRegisterReadReadTime = columnsAndValues['ReadTime']

        elif currentTableName == "Event":
            columnsAndValues['Event_Content'] = element.text


    def walkTheTreeFromRoot(self, root, jobID = ''):
        """
        Walk an XML tree from its root node.

        :param root: The root node of an XML tree.
        :param jobID: Identifier used to distinguish multiprocessing jobs.
        :returns: String containing a concise log of parsing activity.
        """

        parseLog = ''
        walker = root.iter()

        for element, nextElement in self.getNext(walker):
            # Process every element in the tree while reading ahead to get
            # the next element.

            currentTableName = self.tableNameForAnElement(element)
            nextTableName = self.tableNameForAnElement(nextElement)
            assert currentTableName is not None, "Current table does not exist."

            # Maintain a count of tables encountered.
            self.tableNameCount[currentTableName] += 1

            columnsAndValues = {}
            it = iter(sorted(element.attrib.iteritems()))

            for item in list(it):
                # Create a dictionary of column names and values.
                columnsAndValues[item[0]] = item[1]

            if currentTableName in self.insertTables:
                # Check if the current table is one of the tables to have data
                # inserted.

                self.processForInsertElementCount += 1

                if self.debug:
                    self.logger.log("Processing table %s, next is %s." % (
                        currentTableName, nextTableName), 'debug')

                # Get the column name for the primary key.
                pkeyCol = self.mapper.dbColumnsForTable(currentTableName)[
                    '_pkey']

                fkeyCol = None
                fKeyValue = None

                try:
                    # Get the column name for the foreign key.
                    fkeyCol = self.mapper.dbColumnsForTable(currentTableName)[
                        '_fkey']
                except:
                    pass

                if self.debug:
                    self.logger.log("foreign key col (fkey) = %s" % fkeyCol,
                                    'debug')
                    self.logger.log("primary key col (pkey) = %s" % pkeyCol,
                                    'debug')
                    self.logger.log(columnsAndValues, 'debug')

                if fkeyCol is not None:
                    # Get the foreign key value.
                    fKeyValue = self.fkDeterminer.pkValforCol[fkeyCol]

                if self.debug:
                    self.logger.log("fKeyValue = %s" % fKeyValue, 'debug')

                self.performTableBasedOperations(columnsAndValues,
                                                 currentTableName, element)

                if self.insertDataIntoDatabase:
                    # Data is intended to be inserted into the database.
                    parseLog = self.processDataToBeInserted(columnsAndValues,
                                                            currentTableName,
                                                            fKeyValue, parseLog,
                                                            pkeyCol,
                                                            jobID = jobID)

                if self.debug:
                    self.logger.log("lastSeqVal = ", self.lastSeqVal)

                if self.lastReading(currentTableName, nextTableName):
                    # The last reading set has been reached.

                    if self.debug:
                        self.logger.log("----- last reading found -----",
                                        'debug')

                    parseLog += self.generateConciseLogEntries(jobID = jobID,
                                                               reportType =
                                                               'INTERMEDIARY')
                    self.resetGroupCounters()

                    parseLog += self.logger.logAndWrite("*")
                    self.commitCount += 1
                    self.conn.commit()

                if self.lastRegister(currentTableName, nextTableName):
                    # The last register set has been reached.

                    if self.debug:
                        self.logger.log("----- last register found -----",
                                        'debug')


        # Initial commit.
        if self.commitCount == 0:
            parseLog += self.generateConciseLogEntries(jobID = jobID,
                                                       reportType =
                                                       'INTERMEDIARY')
        self.resetGroupCounters()

        # Final commit.
        parseLog += self.logger.logAndWrite("---")
        parseLog += self.generateConciseLogEntries(jobID = jobID,
                                                   reportType = 'FINAL')
        self.resetGroupCounters()

        parseLog += self.logger.logAndWrite("*")
        self.commitCount += 1
        self.conn.commit()
        sys.stderr.write("\n")

        self.logger.log("Data process count = %s." % self.dataProcessCount,
                        'info')
        self.logger.log(
            "Reading dupe check count = %s." % self.readingDupeCheckCount,
            'info')
        return parseLog


    def lastReading(self, currentTable, nextTable):
        """
        Determine if the last reading is being visited.

        :param currentTable: current table being processsed.
        :param nextTable: next table to be processed.
        :returns: True if last object in Reading table was read,
        otherwise return False.
        """

        if currentTable == "Reading" and (
                    nextTable == "MeterData" or nextTable == None):
            return True
        return False


    def lastRegister(self, currentTable, nextTable):
        """
        Determine if the last register is being visited.

        :param currentTable: current table being processsed.
        :param nextTable: next table to be processed.
        :returns: True if last object in Register table was read,
        otherwise return False.
        """

        if currentTable == "Register" and (
                    nextTable == "MeterData" or nextTable == None):
            return True
        return False


    def getNext(self, somethingIterable, window = 1):
        """
        Return the current item and next item in an iterable data structure.

        :param somethingIterable: Something that has an iterator.
        :param window: How far to look ahead in the collection.
        :returns: The current iterable value and the next iterable value.
        """

        items, nexts = tee(somethingIterable, 2)
        nexts = islice(nexts, window, None)
        return izip_longest(items, nexts)


    def initChannelProcessed(self):
        """
        Initialize the dictionary for channel processing.
        """

        self.channelProcessed = {'1': False, '2': False, '3': False, '4': False}


    def getLastElement(self, rows):
        """
        Get the last element in a collection.

        Example:
            rows = (element1, element2, element3)
            getLastElement(rows) # return element3

        :param rows: Result rows from a query.
        :returns: The last element in the collection.
        """

        for i, var in enumerate(rows):
            if i == len(rows) - 1:
                return var