Esempio n. 1
0
class MECODataAutoloader(object):
    """
    Provide automated loading of MECO energy data from exports in
    gzip-compressed XML source data.
    """
    def __init__(self):
        """
        Constructor.
        """

        self.logger = SEKLogger(__name__)
        self.configer = MSGConfiger()
        self.fileUtil = MSGFileUtil()

    def newDataExists(self):
        """
        Check the data autoload folder for the presence of new data.

        :returns: True if new data exists.
        """

        autoloadPath = self.configer.configOptionValue(
            'MECO Autoload', 'meco_autoload_new_data_path')
        if not self.fileUtil.validDirectory(autoloadPath):
            raise Exception('InvalidDirectory', '%s' % autoloadPath)

        patterns = ['*.gz']
        matchCnt = 0
        for root, dirs, filenames in os.walk(autoloadPath):
            for pat in patterns:
                for filename in fnmatch.filter(filenames, pat):
                    print filename
                    matchCnt += 1
        if matchCnt > 0:
            return True
        else:
            return False

    def loadNewData(self):
        """
        Load new data contained in the new data path.
        """

        autoloadPath = self.configer.configOptionValue(
            'MECO Autoload', 'meco_autoload_new_data_path')
        command = self.configer.configOptionValue('MECO Autoload',
                                                  'meco_autoload_command')
        os.chdir(autoloadPath)

        try:
            subprocess.check_call(command, shell=True)
        except subprocess.CalledProcessError, e:
            self.logger.log("An exception occurred: %s" % e, 'error')
class MECODataAutoloader(object):
    """
    Provide automated loading of MECO energy data from exports in
    gzip-compressed XML source data.
    """

    def __init__(self):
        """
        Constructor.
        """

        self.logger = SEKLogger(__name__)
        self.configer = MSGConfiger()
        self.fileUtil = MSGFileUtil()

    def newDataExists(self):
        """
        Check the data autoload folder for the presence of new data.

        :returns: True if new data exists.
        """

        autoloadPath = self.configer.configOptionValue("MECO Autoload", "meco_autoload_new_data_path")
        if not self.fileUtil.validDirectory(autoloadPath):
            raise Exception("InvalidDirectory", "%s" % autoloadPath)

        patterns = ["*.gz"]
        matchCnt = 0
        for root, dirs, filenames in os.walk(autoloadPath):
            for pat in patterns:
                for filename in fnmatch.filter(filenames, pat):
                    print filename
                    matchCnt += 1
        if matchCnt > 0:
            return True
        else:
            return False

    def loadNewData(self):
        """
        Load new data contained in the new data path.
        """

        autoloadPath = self.configer.configOptionValue("MECO Autoload", "meco_autoload_new_data_path")
        command = self.configer.configOptionValue("MECO Autoload", "meco_autoload_command")
        os.chdir(autoloadPath)

        try:
            subprocess.check_call(command, shell=True)
        except subprocess.CalledProcessError, e:
            self.logger.log("An exception occurred: %s" % e, "error")
class MSGWeatherDataDupeChecker(object):
    """
    Determine if a duplicate record exists based on the tuple

    (WBAN, Date, Time, StationType).
    """

    def __init__(self, testing = False):
        """
        Constructor.

        :param testing: Flag for testing mode.
        """

        self.logger = SEKLogger(__name__, 'debug')
        self.dbUtil = MSGDBUtil()


    def duplicateExists(self, dbCursor, wban, datetime, recordType):
        """
        Check for the existence of a duplicate record.

        :param dbCursor
        :param wban
        :param datetime
        :param recordType
        :returns: True if a duplicate record exists, otherwise False.
        """

        tableName = "WeatherNOAA"
        sql = """SELECT wban, datetime, record_type FROM \"%s\" WHERE
                 wban = '%s' AND datetime = '%s' AND record_type = '%s'""" % (
            tableName, wban, datetime, recordType)

        self.logger.log("sql=%s" % sql, 'debug')
        self.logger.log("wban=%s, datetime=%s, record_type=%s" % (
            wban, datetime, recordType), 'debug')

        self.dbUtil.executeSQL(dbCursor, sql)
        rows = dbCursor.fetchall()

        if len(rows) > 0:
            return True
        else:
            return False
Esempio n. 4
0
class MSGWeatherDataDupeChecker(object):
    """
    Determine if a duplicate record exists based on the tuple

    (WBAN, Date, Time, StationType).
    """
    def __init__(self, testing=False):
        """
        Constructor.

        :param testing: Flag for testing mode.
        """

        self.logger = SEKLogger(__name__, 'debug')
        self.dbUtil = MSGDBUtil()

    def duplicateExists(self, dbCursor, wban, datetime, recordType):
        """
        Check for the existence of a duplicate record.

        :param dbCursor
        :param wban
        :param datetime
        :param recordType
        :returns: True if a duplicate record exists, otherwise False.
        """

        tableName = "WeatherNOAA"
        sql = """SELECT wban, datetime, record_type FROM \"%s\" WHERE
                 wban = '%s' AND datetime = '%s' AND record_type = '%s'""" % (
            tableName, wban, datetime, recordType)

        self.logger.log("sql=%s" % sql, 'debug')
        self.logger.log(
            "wban=%s, datetime=%s, record_type=%s" %
            (wban, datetime, recordType), 'debug')

        self.dbUtil.executeSQL(dbCursor, sql)
        rows = dbCursor.fetchall()

        if len(rows) > 0:
            return True
        else:
            return False
Esempio n. 5
0
class MSGTimeUtilTester(unittest.TestCase):
    def setUp(self):
        self.logger = SEKLogger(__name__, 'debug')
        self.timeUtil = MSGTimeUtil()

    def test_concise_now(self):
        conciseNow = self.timeUtil.conciseNow()
        self.logger.log(conciseNow)
        pattern = '\d+-\d+-\d+_\d+'
        result = re.match(pattern, conciseNow)
        self.assertTrue(result is not None,
                        "Concise now matches the regex pattern.")

    def test_split_dates(self):
        start = dt(2014, 01, 07)
        end = dt(2014, 04, 04)
        print self.timeUtil.splitDates(start, end)
        self.assertEqual(len(self.timeUtil.splitDates(start, end)), 4,
                         'Unexpected date count.')
Esempio n. 6
0
class SIDataUtilTester(unittest.TestCase):
    """
    """

    def setUp(self):
        self.dataUtil = SIDataUtil()
        self.logger = SEKLogger(__name__)


    def test_find_max_timestamp(self):
        filePath = 'data/test-meter/log.csv'
        self.assertEquals(self.dataUtil.maxTimeStamp(filePath),
                          datetime.strptime('2014-03-10 23:59:00',
                                            '%Y-%m-%d %H:%M:%S'))

    def test_find_max_timestamp_db(self):
        # @todo test with a static testing DB
        meter = '001EC6051A0D'
        self.logger.log(self.dataUtil.maxTimeStampDB(meter))
Esempio n. 7
0
class WeatherDataLoadingTester(unittest.TestCase):
    def setUp(self):
        self.weatherUtil = MSGWeatherDataUtil()
        self.logger = SEKLogger(__name__, 'DEBUG')
        self.dbConnector = MSGDBConnector()
        self.cursor = self.dbConnector.conn.cursor()
        self.configer = MSGConfiger()


    def testLoadDataSinceLastLoaded(self):
        """
        Data should be loaded since the last data present in the database.
        """
        pass


    def testRetrieveDataSinceLastLoaded(self):
        """
        Data since the last loaded date is retrieved.
        """
        pass


    def testGetLastLoadedDate(self):
        myDate = self.weatherUtil.getLastDateLoaded(self.cursor).strftime(
            "%Y-%m-%d %H:%M:%S")
        pattern = '^(\d+-\d+-\d+\s\d+:\d+:\d+)$'
        match = re.match(pattern, myDate)
        assert match and (match.group(1) == myDate), "Date format is valid."


    def testWeatherDataPattern(self):
        myPattern = self.configer.configOptionValue('Weather Data',
                                                    'weather_data_pattern')
        testString = """<A HREF="someURL">QCLCD201208.zip</A>"""

        match = re.match(myPattern, testString)
        self.logger.log("pattern = %s" % myPattern, 'info')
        if match:
            self.logger.log("match = %s" % match)
            self.logger.log("match group = %s" % match.group(1))
        else:
            self.logger.log("match not found")
        assert match and match.group(
            1) == 'QCLCD201208.zip', "Download filename was matched."


    def testWeatherDataURL(self):
        myURL = self.configer.configOptionValue('Weather Data',
                                                'weather_data_url')
        pass
class SIConfiger(object):
    """
    Supports site-level config for the Smart Grid PV Inverter project.
    The default path is ~/.smart-inverter.cfg.

    Usage:

        configer = SIConfiger()

    """
    def __init__(self):
        """
        Constructor.
        """

        self._config = ConfigParser.ConfigParser()
        self.logger = SEKLogger(__name__, 'INFO')
        self.fileUtil = SEKFileUtil()
        self.dbUtil = SEKDBUtil()
        self.cursor = None

        configFilePath = '~/.smart-inverter.cfg'

        if self.fileUtil.isMoreThanOwnerReadableAndWritable(
                os.path.expanduser(configFilePath)):
            self.logger.log(
                "Configuration file permissions are too permissive. Operation "
                "will not continue.", 'error')
            sys.exit(-1)

        try:
            self._config.read(['site.cfg', os.path.expanduser(configFilePath)])
        except:
            self.logger.log(
                "Critical error: The data in {} cannot be "
                "accessed successfully.".format(configFilePath), 'ERROR')
            sys.exit(-1)

    def configOptionValue(self, section, option):
        """
        Get a configuration value from the local configuration file.
        :param section: String of section in config file.
        :param option: String of option in config file.
        :returns: The value contained in the configuration file.
        """

        try:
            configValue = self._config.get(section, option)
            if configValue == "True":
                return True
            elif configValue == "False":
                return False
            else:
                return configValue
        except:
            self.logger.log(
                "Failed when getting configuration option {} in section {"
                "}.".format(option, section), 'error')
            sys.exit(-1)
Esempio n. 9
0
class MSGDBExporterTester(unittest.TestCase):
    """
    Unit tests for the MSG Cloud Exporter.
    """
    def setUp(self):
        self.logger = SEKLogger(__name__, 'DEBUG')
        self.configer = MSGConfiger()
        self.exporter = MSGDBExporter()
        self.testDir = 'db_exporter_test'
        self.uncompressedTestFilename = 'meco_v3_test_data.sql'
        self.compressedTestFilename = 'meco_v3_test_data.sql.gz'
        self.exportTestDataPath = self.configer.configOptionValue(
            'Testing', 'export_test_data_path')
        self.fileUtil = MSGFileUtil()
        self.fileChunks = []
        self.testDataFileID = ''
        self.pyUtil = MSGPythonUtil()
        self.timeUtil = MSGTimeUtil()

        conn = None
        try:
            conn = MSGDBConnector().connectDB()
        except Exception as detail:
            self.logger.log("Exception occurred: {}".format(detail), 'error')
            exit(-1)

        self.logger.log("conn = {}".format(conn), 'debug')
        self.assertIsNotNone(conn)

        # Create a temporary working directory.
        try:
            os.mkdir(self.testDir)
        except OSError as detail:
            self.logger.log(
                'Exception during creation of temp directory: %s' % detail,
                'ERROR')

    def tearDown(self):
        """
        Delete all test items.
        """

        REMOVE_TEMPORARY_FILES = True
        if REMOVE_TEMPORARY_FILES:
            try:
                self.logger.log(
                    "Removing local test files {}, {}.".format(
                        self.uncompressedTestFilename,
                        self.compressedTestFilename), 'debug')
                os.remove(
                    os.path.join(os.getcwd(), self.testDir,
                                 self.uncompressedTestFilename))
                os.remove(
                    os.path.join(os.getcwd(), self.testDir,
                                 self.compressedTestFilename))
            except OSError as detail:
                self.logger.log(
                    'Exception while removing temporary files: {}'.format(
                        detail), 'SILENT')
            try:
                os.remove(
                    os.path.join(os.getcwd(), self.testDir,
                                 self.compressedTestFilename))
            except OSError as detail:
                self.logger.log(
                    'Exception while removing temporary files: {}'.format(
                        detail), 'SILENT')
            try:
                for f in self.fileChunks:
                    os.remove(f)
            except OSError as detail:
                self.logger.log(
                    'Exception while removing temporary files: {}'.format(
                        detail), 'DEBUG')

        try:
            os.rmdir(self.testDir)
        except OSError as detail:
            self.logger.log(
                'Exception while removing directory: {}'.format(detail),
                'ERROR')

        # Keep deleting from the cloud until there are no more to delete.
        def deleteFromCloud():
            self.logger.log("deleting from cloud", 'debug')
            try:
                fileIDToDelete = self.exporter.fileIDForFileName(
                    self.compressedTestFilename)
                if fileIDToDelete is None:
                    return
                self.logger.log("file ID to delete: {}".format(fileIDToDelete),
                                'DEBUG')
                self.exporter.driveService.files().delete(
                    fileId='{}'.format(fileIDToDelete)).execute()
                deleteFromCloud()
            except (TypeError, http.HttpError) as e:
                self.logger.log('Delete not successful: {}'.format(e), 'DEBUG')

        deleteFromCloud()

    def _upload_test_data_to_cloud(self):
        """
        Provide an upload of test data that can be used in other tests.

        Side effect: Store the file ID as an ivar.
        """
        self.logger.log("Uploading test data for caller: {}".format(
            self.pyUtil.callerName()))

        filePath = "{}/{}".format(self.exportTestDataPath,
                                  self.compressedTestFilename)
        self.logger.log('Uploaded {}.'.format(filePath), 'info')

        uploadResult = self.exporter.uploadFileToCloudStorage(filePath)
        self.logger.log('upload result: {}'.format(uploadResult))

        self.testDataFileID = self.exporter.fileIDForFileName(
            self.compressedTestFilename)
        self.logger.log("Test file ID is {}.".format(self.testDataFileID))

    def test_markdown_list_of_downloadable_files(self):
        """
        Match the Markdown line entry for the uploaded file.
        """
        # @REVIEWED
        self._upload_test_data_to_cloud()
        self.assertEquals(
            len(
                filter(
                    lambda x: self.testDataFileID in x,
                    self.exporter.markdownListOfDownloadableFiles().splitlines(
                    ))), 1)

    def test_get_md5_sum_from_cloud(self):
        """
        Test retrieving the MD5 sum from the cloud.
        """
        # @REVIEWED
        self.logger.log('Testing getting the MD5 sum.', 'info')
        self._upload_test_data_to_cloud()
        testFileMD5 = filter(
            lambda x: x['id'] == self.testDataFileID,
            self.exporter.cloudFiles['items'])[0]['md5Checksum']
        self.assertEquals(len(testFileMD5), 32)
        self.assertTrue(re.match(r'[0-9A-Za-z]+', testFileMD5))

    def test_get_file_id_for_nonexistent_file(self):
        """
        Test getting a file ID for a nonexistent file.
        """
        # @REVIEWED
        fileIDs = self.exporter.fileIDForFileName('nonexistent_file')
        self.logger.log("file ids = {}".format(fileIDs), 'info')
        self.assertIsNone(fileIDs)

    def test_upload_test_data(self):
        """
        Upload a test data file for unit testing of DB export.

        The unit test data file is a predefined set of test data stored in
        the test data path of the software distribution.
        """
        # @REVIEWED

        self._upload_test_data_to_cloud()
        self.assertGreater(len(self.testDataFileID), 0)
        self.assertTrue(re.match(r'[0-9A-Za-z]+', self.testDataFileID))

    def test_adding_reader_permissions(self):
        """
        Add reader permissions to a file that was uploaded.
        """
        # @REVIEWED
        self.logger.log("Testing adding reader permissions.")
        self._upload_test_data_to_cloud()

        email = self.configer.configOptionValue('Testing',
                                                'tester_email_address')
        service = self.exporter.driveService
        try:
            id_resp = service.permissions().getIdForEmail(
                email=email).execute()
            print id_resp

        except errors.HttpError as detail:
            print 'Exception while getting ID for email: {}'.format(detail)

        new_permission = {'value': email, 'type': 'user', 'role': 'reader'}
        try:
            self.logger.log('Adding reader permission', 'INFO')
            fileIDToAddTo = self.testDataFileID

            # The permission dict is being output to stdout here.
            resp = service.permissions().insert(fileId=fileIDToAddTo,
                                                sendNotificationEmails=False,
                                                body=new_permission).execute()
        except errors.HttpError as detail:
            self.logger.log(
                'Exception while adding reader permissions: {}'.format(detail),
                'error')

        def permission_id(email):
            try:
                id_resp = service.permissions().getIdForEmail(
                    email=email).execute()
                return id_resp['id']
            except errors.HttpError as error:
                self.logger.log("HTTP error: {}".format(error))

        permission = {}
        try:
            permission = service.permissions().get(
                fileId=self.testDataFileID,
                permissionId=permission_id(email)).execute()
        except errors.HttpError as error:
            self.logger.log("HTTP error: {}".format(error))

        self.assertEquals(permission['role'], 'reader')

    def test_create_compressed_archived(self):
        """
        * Copy test data to a temp directory (self.testDir).
        * Create a checksum for test data.
        * Create a gzip-compressed archive.
        * Extract gzip-compressed archive.
        * Create a checksum for the uncompressed data.
        * Compare the checksums.
        """
        # @REVIEWED

        self.logger.log('Testing verification of a compressed archive.')

        self.logger.log('cwd {}'.format(os.getcwd()))
        fullPath = '{}'.format(
            os.path.join(os.getcwd(), self.testDir,
                         self.uncompressedTestFilename))
        shutil.copyfile(
            '{}/{}'.format(self.exportTestDataPath,
                           self.uncompressedTestFilename), fullPath)

        md5sum1 = self.fileUtil.md5Checksum(fullPath)

        self.exporter.fileUtil.gzipCompressFile(fullPath)

        try:
            os.remove(
                os.path.join(os.getcwd(), self.testDir,
                             self.uncompressedTestFilename))
        except OSError as detail:
            self.logger.log('Exception while removing: {}'.format(detail),
                            'ERROR')

        # Extract archived data and generate checksum.
        src = gzip.open('{}{}'.format(fullPath, '.gz'), "rb")
        uncompressed = open(fullPath, "wb")
        decoded = src.read()
        uncompressed.write(decoded)
        uncompressed.close()

        md5sum2 = self.fileUtil.md5Checksum(fullPath)

        self.assertEqual(
            md5sum1, md5sum2, 'Checksums are not equal for original and new '
            'decompressed archive.')

    def test_export_db(self):
        """
        Perform a quick test of the DB export method using Testing Mode.

        This requires sudo authorization to complete.
        """
        # @REVIEWED

        self.logger.log('Testing exportDB using the testing DB.')

        # @todo handle case where testing db does not exist.

        dbs = ['test_meco']
        ids = self.exporter.exportDBs(databases=dbs,
                                      toCloud=True,
                                      localExport=True)
        self.logger.log('Count of exports: {}'.format(len(ids)))
        self.assertEquals(len(ids), 1, "Count of exported files is wrong.")

        map(self.exporter.deleteFile, ids)

    def test_split_archive(self):
        """
        Test splitting an archive into chunks.
        """
        # @REVIEWED
        self.logger.log('Testing archive splitting.')
        fullPath = '{}/{}'.format(self.exportTestDataPath,
                                  self.compressedTestFilename)
        self.logger.log('fullpath: {}'.format(fullPath))
        shutil.copyfile(
            fullPath, '{}/{}'.format(self.testDir,
                                     self.compressedTestFilename))
        fullPath = '{}/{}'.format(self.testDir, self.compressedTestFilename)

        self.fileChunks = self.fileUtil.splitLargeFile(fullPath=fullPath,
                                                       numChunks=3)
        self.assertEquals(len(self.fileChunks), 3)

    def test_get_file_size(self):
        """
        Test retrieving local file sizes.
        """
        # @REVIEWED
        fullPath = '{}/{}'.format(self.exportTestDataPath,
                                  self.compressedTestFilename)
        fSize = self.fileUtil.fileSize(fullPath)
        self.logger.log('size: {}'.format(fSize))
        self.assertEqual(fSize, 12279, 'File size is correct.')

    def test_upload_export_files_list(self):
        """
        TBW
        """
        pass

    def test_checksum_after_upload(self):
        """
        TBW
        """
        pass

    def test_dump_exclusions_dictionary(self):
        """
        Verify the exclusions dictionary by its type.
        :return:
        """
        # @REVIEWED
        exclusions = self.exporter.dumpExclusionsDictionary()

        if exclusions:
            self.assertEquals(type({}), type(exclusions))

    def test_move_to_final(self):
        """
        Test moving a file to the final destination path.
        """
        # @REVIEWED
        self.logger.log('Testing moving to final path {}.'.format(
            self.configer.configOptionValue('Export', 'db_export_final_path')))

        origCompressedFile = '{}/{}'.format(
            self.configer.configOptionValue('Export', 'export_test_data_path'),
            self.compressedTestFilename)
        newCompressedFile = '{}/{}'.format(
            self.configer.configOptionValue('Export', 'export_test_data_path'),
            'temp_test_file')

        shutil.copyfile(origCompressedFile, newCompressedFile)

        self.exporter.moveToFinalPath(compressedFullPath=newCompressedFile)

        self.assertTrue(
            os.path.isfile('{}/{}'.format(
                self.configer.configOptionValue('Export',
                                                'db_export_final_path'),
                'temp_test_file')))

        # Remove the test file.
        os.remove('{}/{}'.format(
            self.configer.configOptionValue('Export', 'db_export_final_path'),
            'temp_test_file'))

    def test_log_successful_export(self):
        """
        Test logging of export results to the export history table.
        """
        # @REVIEWED
        self.assertTrue(
            self.exporter.logSuccessfulExport(name='test_export',
                                              url='http://test_url',
                                              datetime=0,
                                              size=100))

        conn = MSGDBConnector().connectDB()
        cursor = conn.cursor()
        dbUtil = MSGDBUtil()

        self.assertTrue(
            dbUtil.executeSQL(
                cursor, 'select * from "ExportHistory" where '
                'timestamp = '
                'to_timestamp(0)'))

        self.assertEqual(len(cursor.fetchall()), 1,
                         "There should only be one result row.")

        self.assertTrue(
            dbUtil.executeSQL(
                cursor, 'delete from "ExportHistory" where '
                'timestamp = to_timestamp(0)'))
        conn.commit()

    def test_metadata_of_file_id(self):
        """
        Test getting the metadata for a file ID.
        """
        # @REVIEWED
        self._upload_test_data_to_cloud()

        self.logger.log('metadata: {}'.format(
            self.exporter.metadataOfFileID(self.testDataFileID)))

        self.assertTrue(re.match(r'[0-9A-Za-z]+', self.testDataFileID))

    def test_filename_for_file_id(self):
        """
        Test returning a file name given a file ID.
        """
        # @REVIEWED
        self._upload_test_data_to_cloud()
        self.assertEquals(
            self.exporter.filenameForFileID(fileID=self.testDataFileID),
            self.compressedTestFilename)

    def test_outdated_files(self):
        # @REVIEWED
        self._upload_test_data_to_cloud()
        time.sleep(1)
        self.logger.log("outdated:")

        # For debugging:
        for item in self.exporter.outdatedFiles(
                daysBeforeOutdated=datetime.timedelta(days=-1)):
            self.logger.log(
                "name: {}, created date: {}".format(item['originalFilename'],
                                                    item['createdDate']),
                'debug')

        # Get all the outdated files where outdated is equal to anything
        # uploaded today or later.
        self.assertTrue(
            self.exporter.outdatedFiles(daysBeforeOutdated=datetime.timedelta(
                days=-1))[0]['id'] == self.testDataFileID)

        self.logger.log('-----')

    def test_delete_outdated(self):
        """
        TBW
        """
        pass

    def test_list_of_downloadable_files(self):
        """
        Test the list of downloadable files used by the available files page.
        """
        # @REVIEWED
        self._upload_test_data_to_cloud()
        self.assertEquals(
            len(
                filter(lambda row: row['id'] == self.testDataFileID,
                       self.exporter.listOfDownloadableFiles())), 1,
            "Test file not present.")

    def test_count_of_db_exports(self):
        count = self.exporter.countOfDBExports(EARLIEST_DATE)
        self.logger.log(count, 'DEBUG')
        self.assertTrue(int(count) or int(count) == int(0))

    def test_count_of_cloud_files(self):
        count = self.exporter.countOfCloudFiles()
        self.assertTrue(int(count) or int(count) == int(0))

    def test_plaintext_list_of_downloadable_files(self):
        """
        This test handles content both with content links and without content
        links.
        """
        content = self.exporter.plaintextListOfDownloadableFiles()
        self.assertRegexpMatches(
            content, '\d+-\d+-\d+.*\,'
            '\s+\d+-\d+-\d+T\d+:\d+:\d+\.\d+Z\,\s+\d+\sB')

    def test_last_report_date(self):
        last_report = self.exporter.notifier.lastReportDate(
            types=MSGNotificationHistoryTypes,
            noticeType=MSGNotificationHistoryTypes.MSG_EXPORT_SUMMARY)
        self.assertTrue(last_report is None or last_report > EARLIEST_DATE)

    def test_current_export_summary(self):
        self.assertRegexpMatches(
            self.exporter.currentExportSummary(),
            re.compile('last.*databases.*free.*currently.*accessed.*',
                       flags=re.IGNORECASE | re.DOTALL))
class MSGDataAggregatorTester(unittest.TestCase):
    """
    Unit tests for MSG Data Aggregator.

    """

    def setUp(self):
        """
        Constructor.
        """
        self.logger = SEKLogger(__name__, "DEBUG")
        self.aggregator = MSGDataAggregator()
        self.testStart = "2014-01-02 11:59"
        self.testEnd = "2014-01-02 12:14"
        self.rawTypes = ["weather", "egauge", "circuit", "irradiance"]

    def testIrradianceFetch(self):
        """
        Test raw data fetching over the testing time interval.
        :return:
        """

        timeCol = "timestamp"
        rows = []
        for row in self.aggregator.rawData(
            dataType="irradiance",
            orderBy=[timeCol, "sensor_id"],
            timestampCol=timeCol,
            startDate=self.testStart,
            endDate=self.testEnd,
        ):
            rows.append(row)
        self.assertIsNotNone(rows, "Rows are present.")

    def testWeatherFetch(self):
        """
        Test raw data fetching over the testing time interval.
        :return:
        """

        timeCol = "timestamp"
        rows = []
        for row in self.aggregator.rawData(
            dataType="weather", orderBy=[timeCol], timestampCol=timeCol, startDate=self.testStart, endDate=self.testEnd
        ):
            rows.append(row)
        self.assertIsNotNone(rows, "Rows are present.")

    def testCircuitFetch(self):
        """
        Test raw data fetching over the testing time interval.
        :return:
        """

        timeCol = "timestamp"
        rows = []
        for row in self.aggregator.rawData(
            dataType="circuit",
            orderBy=[timeCol, "circuit"],
            timestampCol=timeCol,
            startDate=self.testStart,
            endDate=self.testEnd,
        ):
            rows.append(row)
        self.assertIsNotNone(rows, "Rows are present.")

    def testEgaugeFetch(self):
        """
        Test raw data fetching over the testing time interval.
        :return:
        """

        timeCol = "datetime"
        rows = []
        for row in self.aggregator.rawData(
            dataType="egauge",
            orderBy=[timeCol, "egauge_id"],
            timestampCol=timeCol,
            startDate=self.testStart,
            endDate=self.testEnd,
        ):
            rows.append(row)
        self.assertIsNotNone(rows, "Rows are present.")

    def testEgaugeAggregation(self):
        """
        Perform aggregation over the testing time interval.
        :return:
        """

        self.logger.log("Testing Egauge aggregation.")
        rowCnt = 0
        agg = self.aggregator.aggregatedData(
            dataType="egauge",
            aggregationType="agg_egauge",
            timeColumnName="datetime",
            subkeyColumnName="egauge_id",
            startDate=self.testStart,
            endDate=self.testEnd,
        )
        print [col for col in agg.columns]
                msgBody += 'The new data count for type {} is {} readings' \
                           '.\n'.format(result[i].keys()[0],
                                        result[i].values()[0])
            msgBody += '\n\n'
            msgBody += 'The last report date was %s.' % lastReportDate
            msgBody += '\n\n'
        self.notifier.sendNotificationEmail(msgBody, testing = testing)
        self.notifier.recordNotificationEvent(
            MSGNotificationHistoryTypes.MSG_DATA_AGGREGATOR)


    def aggregateNewData(self):
        """
        :return: list of dicts obtained from
        MSGDataAggregator::aggregateNewData.
        """

        result = map(self.aggregator.aggregateNewData, self.rawTypes)

        self.logger.log('result {}'.format(result))
        return result


if __name__ == '__main__':
    aggregator = NewDataAggregator()
    logger = SEKLogger(__name__)
    logger.log('Last report date {}'.format(aggregator.notifier.lastReportDate(
        MSGNotificationHistoryTypes.MSG_DATA_AGGREGATOR)))
    result = aggregator.aggregateNewData()
    aggregator.sendNewDataNotification(result = result, testing = False)
Esempio n. 12
0
class SingleFileLoaderTester(unittest.TestCase):
    def setUp(self):
        self.logger = SEKLogger(__name__, 'DEBUG')
        self.configer = SIConfiger()
        self.conn = SEKDBConnector(
            dbName=self.configer.configOptionValue('Database', 'db_name'),
            dbHost=self.configer.configOptionValue('Database', 'db_host'),
            dbPort=self.configer.configOptionValue('Database', 'db_port'),
            dbUsername=self.configer.configOptionValue('Database',
                                                       'db_username'),
            dbPassword=self.configer.configOptionValue(
                'Database', 'db_password')).connectDB()
        self.cursor = self.conn.cursor()
        self.dbUtil = SEKDBUtil()
        self.dataUtil = SIDataUtil()
        self.inserter = SingleFileLoader('data/test-meter/log.csv')
        self.data = '"2014-07-12 16:22:30",0,,,1187488464896.00,' \
                    '2322185846784.00,1134697381888.00,35184644096.00,' \
                    '290857353216.00,10133100822528.00,367.13,' \
                    '-17660932096.00,1078.01,17660934144.00,-7.86,1.80,8.06,' \
                    '-0.97,244.01,122.00,32.93,60.01,-7.09,1.42,7.24,8.06,' \
                    '3.34,8.35,-40.18,-5.68,40.52,516.72,403.12,0,' \
                    '8797179904.00,47518.67,0,86.03,50.23,4198.40,' \
                    '281475022848.00,2251868602368.00,0,6820.01,' \
                    '8796095488.00,0,178.83,188.30,0,620.07,505.19,' \
                    '288230389841920.02,12668.18,68729384.00,0,-3.68,-4.18,,' \
                    '1.00,0.79,,3.81,4.25,,-0.97,-0.98,,244.01,,,121.54,' \
                    '122.46,,31.28,34.59,'
        self.testMeterName = 'test-meter'

    def test_columns(self):
        self.assertEquals(len(self.dataUtil.dbColumns), NUM_DB_COLS)

    def test_insert_data(self):
        self.logger.log('testing data insert')
        self.assertTrue(self.inserter.insertData(self.data))
        self.conn.commit()

    def test_sql_formatted_values(self):
        self.logger.log('data: {}'.format(
            self.dataUtil.sqlFormattedValues(self.data)))

    def test_meter_id(self):
        self.logger.log('testing meter id')
        meter_id = self.inserter.meterID(self.testMeterName)
        self.logger.log('meter id {}'.format(meter_id))
        self.assertTrue(isinstance(meter_id, (int, long)))
        self.logger.log('getting meter id')
        sql = 'SELECT meter_id FROM "Meters" WHERE meter_name = \'{}\''.format(
            self.testMeterName)
        success = self.dbUtil.executeSQL(self.cursor, sql, exitOnFail=True)
        if success:
            result = self.cursor.fetchall()
            self.assertEquals(1, len(result))
        else:
            self.logger.log('failed to retrieve meter id', 'error')

    def test_meter_name(self):
        """
        Test getting the meter name.
        :return:
        """
        self.assertEquals(self.inserter.meterName(), self.testMeterName)

    def test_insert_data_from_file(self):
        self.inserter.insertDataFromFile()
        sql = 'SELECT * FROM "MeterData" WHERE meter_id = {}'.format(
            self.inserter.meterID(self.testMeterName))
        success = self.dbUtil.executeSQL(self.cursor, sql, exitOnFail=True)
        if success:
            result = self.cursor.fetchall()
            self.assertEquals(len(result), 10)

        self.assertTrue(success)

    def tearDown(self):
        self.logger.log('teardown', 'debug')
        sql = 'SELECT meter_id FROM "Meters" WHERE meter_name = \'{}\''.format(
            self.testMeterName)
        success = self.dbUtil.executeSQL(self.cursor, sql, exitOnFail=True)
        if success:
            result = self.cursor.fetchall()
            if len(result) == 1:
                sql = 'DELETE FROM "Meters" WHERE meter_id = {}'.format(
                    result[0][0])
                success = self.dbUtil.executeSQL(self.cursor,
                                                 sql,
                                                 exitOnFail=True)
                if success:
                    self.conn.commit()
                sql = 'SELECT meter_id FROM "Meters" WHERE meter_name = \'{' \
                      '}\''.format(self.testMeterName)
                success = self.dbUtil.executeSQL(self.cursor,
                                                 sql,
                                                 exitOnFail=True)
                result = self.cursor.fetchall()
                self.assertEquals(0, len(result))
Esempio n. 13
0
class MSGFileUtil(object):
    """
    Utilities related to files and directories.
    """
    def __init__(self):
        """
        Constructor.
        """
        self.logger = SEKLogger(__name__, 'DEBUG')

    def validDirectory(self, path):
        """
        Verify that the path is a valid directory.

        :param path: Path to check.
        :returns: True if path is a valid directory.
        """

        if os.path.exists(path) and os.path.isdir(path):
            return True
        else:
            return False

    def md5Checksum(self, fullPath):
        """
        Get the MD5 checksum for the file given by fullPath.

        :param fullPath: Full path of the file to generate for which to
        generate a checksum.
        :returns: MD5 checksum value as a hex digest.
        """

        try:
            f = open(fullPath, mode='rb')
            content = hashlib.md5()
            for buf in iter(partial(f.read, 128), b''):
                content.update(buf)
            md5sum = content.hexdigest()
            f.close()
            return md5sum
        except IOError as detail:
            self.logger.log(
                'Exception during checksum calculation: %s' % detail, 'ERROR')

    def gzipUncompressFile(self, srcPath, destPath):
        """
        Gzip uncompress a file given by fullPath.

        @todo Need to deal with large file sizes. Stop reading into memory.

        :param srcPath: Full path of the file to be uncompressed.
        :param destPath: Full path of file to be written to.
        """

        self.logger.log(
            'Uncompressing gzip source %s to %s' % (srcPath, destPath),
            'DEBUG')
        gzipFile = gzip.open(srcPath, "rb")
        uncompressedFile = open(destPath, "wb")
        decoded = gzipFile.read()
        try:
            uncompressedFile.write(decoded)
        except:
            self.logger.log("Exception while writing uncompressed file.")
        gzipFile.close()
        uncompressedFile.close()

    def gzipCompressFile(self, fullPath):
        """
        Perform gzip compression on a file at fullPath.

        @todo Generalize this method.

        :param fullPath: Full path of the file to be compressed.
        :returns: Boolean: True if successful, False otherwise.
        """

        success = False
        self.logger.log('Gzip compressing %s.' % fullPath)
        try:
            f_in = open('%s' % (fullPath), 'rb')
            f_out = gzip.open('%s.gz' % (fullPath), 'wb')
            f_out.writelines(f_in)
            f_out.close()
            f_in.close()
            success = True
        except IOError as detail:
            self.logger.log('IOError exception while gzipping: %s' % detail,
                            'ERROR')
        return success

    def splitFile(self, fullPath='', chunkSize=0):
        """
        @DEPRECATED

        Split a file into chunks. Write output files to base path of the
        input file.

        Adapted from https://gist.github.com/mattiasostmar/7883550.

        :param fullPath:
        :param chunkSize:
        :returns: A list of file chunks in full path form.
        """

        fChunks = []
        basePath = os.path.dirname(fullPath)
        baseName = os.path.basename(fullPath)
        self.logger.log('basename: %s' % baseName)

        f = open(fullPath, 'rb')
        data = f.read()
        f.close()

        bytes = len(data)

        # Ensure splitting doesn't happen if it's not needed.
        if bytes <= chunkSize:
            return [fullPath]

        chunkNames = []

        fCnt = 0
        for i in range(0, bytes + 1, chunkSize):
            fn1 = "%s/%s.%s" % (basePath, baseName, fCnt)
            self.logger.log("Splitter writing to %s" % fn1, 'INFO')
            fChunks.append(fn1)
            chunkNames.append(fn1)

            try:
                f = open(fn1, 'wb')
                f.write(data[i:i + chunkSize])
                f.close()
            except Exception as detail:
                print "Exception during writing split file: %s" % detail

            fCnt += 1

        return fChunks

    def splitLargeFile(self, fullPath='', numChunks=0, chunkSize=0):
        """
        Split a large file into chunks.

        :param fullPath: String
        :param numChunks: Int number of files to be split into.
        :param chunkSize: @DEPRECATED
        :return: List of file chunk names in full path form.
        """

        fChunks = []
        basePath = os.path.dirname(fullPath)
        baseName = os.path.basename(fullPath)
        self.logger.log('basename: %s' % baseName)

        fp = open(fullPath, 'rb')
        fsize = os.path.getsize(fullPath)
        chunkSize = int(float(fsize) / float(numChunks))
        totalBytes = 0

        self.logger.log('chunk size: %s' % chunkSize)

        if numChunks == 0 or numChunks == 1:
            return [fullPath]

        for x in range(numChunks):

            if x == numChunks - 1:
                chunkSize = fsize - totalBytes

            data = fp.read(chunkSize)
            totalBytes += len(data)
            fout = open("%s/%s.%s" % (basePath, baseName, x), "wb")
            self.logger.log('Writing %s/%s.%s' % (basePath, baseName, x),
                            'debug')
            fChunks.append("%s/%s.%s" % (basePath, baseName, x))

            fout.write(data)
            fout.close()

        return fChunks

    def fileSize(self, fullPath=''):
        """
        Get the size in bytes for the file at fullPath.

        :param fullPath:
        :return: size in bytes
        """

        return os.path.getsize(fullPath)
class MSGDBExporter(object):
    """
    Export MSG DBs as SQL scripts.

    Supports export to local storage and to cloud storage.

    Usage:

    from msg_db_exporter import MSGDBExporter
    exporter = MSGDBExporter()

    Public API:

    exportDB(databases:List, 
             toCloud:Boolean, 
             testing:Boolean,
             numChunks:Integer, 
             deleteOutdated:Boolean): Export a list of DBs to the cloud.
    """

    # List of cloud files.
    @property
    def cloudFiles(self):
        self._cloudFiles = self.driveService.files().list().execute()
        return self._cloudFiles

    @property
    def driveService(self):
        if self._driveService:
            return self._driveService

        if not self.credentialPath:
            raise Exception("Credential path is required.")
        storage = Storage(
            '{}/google_api_credentials'.format(self.credentialPath))

        self.googleAPICredentials = storage.get()

        self.logger.log("Authorizing credentials.", 'info')
        http = httplib2.Http()
        http = self.googleAPICredentials.authorize(http)

        self.logger.log("Authorized.", 'info')

        self._driveService = build('drive', 'v2', http = http)

        return self._driveService


    def __init__(self):
        """
        Constructor.
        """

        self.logger = SEKLogger(__name__, 'DEBUG', useColor = False)
        self.timeUtil = MSGTimeUtil()
        self.configer = MSGConfiger()
        self.fileUtil = MSGFileUtil()
        self.pythonUtil = MSGPythonUtil()  # for debugging
        self.connector = MSGDBConnector()
        self.conn = self.connector.connectDB()
        self.cursor = self.conn.cursor()
        self.dbUtil = MSGDBUtil()
        self.notifier = SEKNotifier(connector = self.connector,
                                    dbUtil = self.dbUtil,
                                    user = self.configer.configOptionValue(
                                        'Notifications', 'email_username'),
                                    password = self.configer.configOptionValue(
                                        'Notifications', 'email_password'),
                                    fromaddr = self.configer.configOptionValue(
                                        'Notifications', 'email_from_address'),
                                    toaddr = self.configer.configOptionValue(
                                        'Notifications', 'email_recipients'),
                                    testing_toaddr =
                                    self.configer.configOptionValue(
                                        'Notifications',
                                        'testing_email_recipients'),
                                    smtp_server_and_port =
                                    self.configer.configOptionValue(
                                        'Notifications',
                                        'smtp_server_and_port'))

        # Google Drive parameters.
        self.clientID = self.configer.configOptionValue('Export',
                                                        'google_api_client_id')
        self.clientSecret = self.configer.configOptionValue('Export',
                                                            'google_api_client_secret')
        self.oauthScope = 'https://www.googleapis.com/auth/drive'
        self.oauthConsent = 'urn:ietf:wg:oauth:2.0:oob'
        self.googleAPICredentials = ''
        self.exportTempWorkPath = self.configer.configOptionValue('Export',
                                                                  'db_export_work_path')

        self.credentialPath = self.configer.configOptionValue('Export',
                                                              'google_api_credentials_path')
        self.credentialStorage = Storage(
            '{}/google_api_credentials'.format(self.credentialPath))

        self._driveService = None
        self._cloudFiles = None
        self.postAgent = 'Maui Smart Grid 1.0.0 DB Exporter'
        self.retryDelay = 10
        self.availableFilesURL = ''


    def verifyExportChecksum(self, testing = False):
        """
        Verify the compressed export file using a checksum.

        * Save the checksum of the original uncompressed export data.
        * Extract the compressed file.
        * Verify the uncompressed export data.

        :param testing: When set to True, Testing Mode is used.
        """

        # Get the checksum of the original file.
        md5sum = self.fileUtil.md5Checksum(self.exportTempWorkPath)
        self.logger.log('md5sum: {}'.format(md5sum))


    def db_username(self):
        return "postgres"
        # return self.configer.configOptionValue('Database', 'db_username')

    def db_password(self):
        return self.configer.configOptionValue('Database', 'db_password')

    def db_port(self):
        return self.configer.configOptionValue('Database', 'db_port')


    def dumpCommand(self, db = '', dumpName = ''):
        """
        This method makes use of

        pg_dump -s -p ${PORT}
                   -U ${USERNAME}
                   [-T ${OPTIONAL_TABLE_EXCLUSIONS}]
                   ${DB_NAME} >
                   ${EXPORT_TEMP_WORK_PATH}/${DUMP_TIMESTAMP}_{DB_NAME}.sql

        :param db: String
        :param dumpName: String
        :return: String of command used to export DB.
        """

        # For reference only:
        # Password is passed from ~/.pgpass.
        # Note that ':' and '\' characters should be escaped with '\'.
        # Ref: http://www.postgresql.org/docs/9.1/static/libpq-pgpass.html

        # Dump databases as the superuser. This method does not require a
        # stored password when running under a root crontab.
        if not db or not dumpName:
            raise Exception('DB and dumpname required.')

        # Process exclusions.

        exclusions = self.dumpExclusionsDictionary()
        excludeList = []
        if db in exclusions:
            excludeList = exclusions[db]
        excludeString = ''
        if len(excludeList) > 0 and exclusions != None:
            for e in excludeList:
                excludeString += """-T '"{}"' """.format(e)

        return 'sudo -u postgres pg_dump -p {0} -U {1} {5} {2} > {3}/{4}' \
               '.sql'.format(self.db_port(), self.db_username(), db,
                             self.exportTempWorkPath, dumpName, excludeString)


    def dumpExclusionsDictionary(self):
        """
        :param db: String of DB name for which to retrieve exclusions.
        :return: Dictionary with keys as DBs and values as lists of tables to
        be excluded for a given database.
        """
        try:
            if type(eval(self.configer.configOptionValue('Export',
                                                         'db_export_exclusions'))) == type(
                    {}):
                return eval(self.configer.configOptionValue('Export',
                                                            'db_export_exclusions'))
            else:
                return None
        except SyntaxError as detail:
            self.logger.log(
                'SyntaxError exception while getting exclusions: {}'.format(
                    detail))


    def dumpName(self, db = ''):
        """
        :param db: String
        :return: String of file name used for dump file of db.
        """
        if not db:
            raise Exception('DB required.')
        return "{}_{}".format(self.timeUtil.conciseNow(), db)


    def filesToUpload(self, compressedFullPath = '', numChunks = 0,
                      chunkSize = 0):
        """
        :param compressedFullPath: String
        :param numChunks: Int
        :param chunkSize: Int
        :return: List of files to be uploaded according to their split
        sections, if applicable.
        """
        if numChunks != 0:
            self.logger.log('Splitting {}'.format(compressedFullPath), 'DEBUG')

            filesToUpload = self.fileUtil.splitLargeFile(
                fullPath = compressedFullPath, chunkSize = chunkSize,
                numChunks = numChunks)

            if not filesToUpload:
                raise Exception('Exception during file splitting.')
            else:
                self.logger.log('to upload: {}'.format(filesToUpload), 'debug')
                return filesToUpload

        else:
            return [compressedFullPath]


    def dumpResult(self, db = '', dumpName = '', fullPath = ''):
        """
        :param dumpName: String of filename of dump file.
        :param fullPath: String of full path to dump file.
        :return: Boolean True if dump operation was successful, otherwise False.
        """

        success = True

        self.logger.log('fullPath: {}'.format(fullPath), 'DEBUG')

        try:
            # Generate the SQL script export.
            # @todo check return value of dump command
            self.logger.log('cmd: {}'.format(
                self.dumpCommand(db = db, dumpName = dumpName)))
            subprocess.check_call(
                self.dumpCommand(db = db, dumpName = dumpName), shell = True)
        except subprocess.CalledProcessError as error:
            self.logger.log("Exception while dumping: {}".format(error))
            sys.exit(-1)

        return success


    def exportDBs(self, databases = None, toCloud = False, localExport = True,
                  testing = False, chunkSize = 0, deleteOutdated = False):
        """
        Export a set of DBs to local storage.

        :param databases: List of database names that will be exported.
        :param toCloud: Boolean if set to True, then the export will also be
        copied to cloud storage.
        :param localExport: Boolean when set to True the DB is exported
        locally.
        :param testing: Boolean flag for testing mode. (@DEPRECATED)
        :param chunkSize: Integer size in bytes of chunk size used for
        splitting.
        :param deleteOutdated: Boolean indicating outdated files in the cloud
        should be removed.
        :returns: List of file IDs of uploaded files or None if there is an
        error condition.
        """

        # @todo separate uploading and exporting functions

        noErrors = True
        uploaded = []

        for db in databases:
            self.logger.log('Exporting {} using pg_dump.'.format(db), 'info')

            dumpName = self.dumpName(db = db)
            fullPath = '{}/{}.sql'.format(self.exportTempWorkPath, dumpName)
            if localExport:
                noErrors = self.dumpResult(db, dumpName, fullPath)

            # Perform compression of the file.
            self.logger.log("Compressing {} using gzip.".format(db), 'info')
            self.logger.log('fullpath: {}'.format(fullPath), 'DEBUG')

            gzipResult = self.fileUtil.gzipCompressFile(fullPath)
            compressedFullPath = '{}{}'.format(fullPath, '.gz')
            numChunks = self.numberOfChunksToUse(compressedFullPath)

            # Gzip uncompress and verify by checksum is disabled until a more
            # efficient, non-memory-based, uncompress is implemented.
            # md5sum1 = self.fileUtil.md5Checksum(fullPath)
            # self.md5Verification(compressedFullPath=compressedFullPath,
            # fullPath=fullPath,md5sum1=md5sum1)

            if toCloud:
                # Split compressed files into a set of chunks to improve the
                # reliability of uploads.

                # Upload the files to the cloud.
                for f in self.filesToUpload(
                        compressedFullPath = compressedFullPath,
                        numChunks = numChunks, chunkSize = chunkSize):
                    self.logger.log('Uploading {}.'.format(f), 'info')
                    fileID = self.uploadFileToCloudStorage(fullPath = f,
                                                           testing = testing,
                                                           retryCount = int(
                                                               self.configer.configOptionValue(
                                                                   'Export',
                                                                   'export_retry_count')))

                    self.logger.log('file id after upload: {}'.format(fileID))

                    if fileID != None:
                        uploaded.append(fileID)
                        self.logger.log('uploaded: {}'.format(uploaded),
                                        'DEBUG')
                        if not self.addReaders(fileID,
                                               self.configer.configOptionValue(
                                                       'Export',
                                                       'reader_permission_email_addresses').split(
                                                       ','), retryCount = int(
                                        self.configer.configOptionValue(
                                                'Export',
                                                'export_retry_count'))):
                            self.logger.log(
                                'Failed to add readers for {}.'.format(f),
                                'error')
                        self.logSuccessfulExport(*self.metadataOfFileID(fileID))

                    # Remove split sections if they exist.
                    try:
                        if not testing and numChunks > 1:
                            self.logger.log('Removing {}'.format(f))
                            os.remove('{}'.format(f))
                    except OSError as error:
                        self.logger.log(
                            'Exception while removing {}: {}.'.format(fullPath,
                                                                      error))
                        noErrors = False

            # End if toCloud.

            if gzipResult:
                self.moveToFinalPath(compressedFullPath = compressedFullPath)

            # Remove the uncompressed file.
            try:
                if not testing:
                    self.logger.log('Removing {}'.format(fullPath))
                    os.remove('{}'.format(fullPath))
            except OSError as error:
                self.logger.log(
                    'Exception while removing {}: {}.'.format(fullPath, error))
                noErrors = False

        # End for db in databases.

        if deleteOutdated:
            self.deleteOutdatedFiles(datetime.timedelta(days = int(
                self.configer.configOptionValue('Export',
                                                'export_days_to_keep'))))

        return uploaded if noErrors else None


    def moveToFinalPath(self, compressedFullPath = ''):
        """
        Move a compressed final to the final export path.
        :param compressedFullPath: String for the compressed file.
        :return:
        """
        self.logger.log('Moving {} to final path.'.format(compressedFullPath),
                        'debug')
        try:
            shutil.move(compressedFullPath,
                        self.configer.configOptionValue('Export',
                                                        'db_export_final_path'))
        except Exception as detail:
            self.logger.log(
                'Exception while moving {} to final export path: {}'.format(
                    compressedFullPath, detail), 'error')


    def md5Verification(self, compressedFullPath = '', fullPath = '',
                        md5sum1 = ''):
        """
        Perform md5 verification of a compressed file at compressedFullPath
        where the original file is at fullPath and has md5sum1.

        :param compressedFullPath: String
        :param fullPath: String
        :param md5sum1: String of md5sum of source file.
        :return:
        """

        GZIP_UNCOMPRESS_FILE = False
        if GZIP_UNCOMPRESS_FILE:
            # Verify the compressed file by uncompressing it and
            # verifying its
            # checksum against the original checksum.
            self.logger.log('reading: {}'.format(compressedFullPath), 'DEBUG')
            self.logger.log('writing: {}'.format(os.path.join(
                self.configer.configOptionValue('Testing',
                                                'export_test_data_path'),
                os.path.splitext(os.path.basename(fullPath))[0])), 'DEBUG')

            self.fileUtil.gzipUncompressFile(compressedFullPath, os.path.join(
                self.configer.configOptionValue('Testing',
                                                'export_test_data_path'),
                fullPath))

        VERIFY_BY_CHECKSUM = False
        if VERIFY_BY_CHECKSUM:
            md5sum2 = self.fileUtil.md5Checksum(fullPath)

            self.logger.log("mtime: {}, md5sum2: {}".format(
                time.ctime(os.path.getmtime(fullPath)), md5sum2), 'INFO')

            if md5sum1 == md5sum2:
                self.logger.log(
                    'Compressed file has been validated by checksum.', 'INFO')
            else:
                noErrors = False

    def numberOfChunksToUse(self, fullPath):
        """
        Return the number of chunks to be used by the file splitter based on
        the file size of the file at fullPath.
        :param fullPath: String
        :returns: Int Number of chunks to create.
        """

        fsize = os.path.getsize(fullPath)
        self.logger.log('fullpath: {}, fsize: {}'.format(fullPath, fsize))
        if (fsize >= int(self.configer.configOptionValue('Export',
                                                         'max_bytes_before_split'))):
            # Note that this does not make use of the remainder in the division.
            chunks = int(fsize / int(self.configer.configOptionValue('Export',
                                                                     'max_bytes_before_split')))
            self.logger.log('Will split with {} chunks.'.format(chunks))
            return chunks
        self.logger.log('Will NOT split file.', 'debug')
        return 1


    def uploadFileToCloudStorage(self, fullPath = '', retryCount = 0,
                                 testing = False):
        """
        Export a file to cloud storage.

        :param fullPath: String of file to be exported.
        :param testing: Boolean when set to True, Testing Mode is used.
        :param retryCount: Int of number of times to retry the upload if
        there is a failure.
        :returns: String File ID on verified on upload; None if verification
        fails.
        """

        success = True
        myFile = os.path.basename(fullPath)

        self.logger.log(
            'full path {}'.format(os.path.dirname(fullPath), 'DEBUG'))
        self.logger.log("Uploading {}.".format(myFile))

        result = {}
        try:
            media_body = MediaFileUpload(fullPath,
                                         mimetype =
                                         'application/gzip-compressed',
                                         resumable = True)
            body = {'title': myFile,
                    'description': 'Hawaii Smart Energy Project gzip '
                                   'compressed DB export.',
                    'mimeType': 'application/gzip-compressed'}

            # Result is a Files resource.
            result = self.driveService.files().insert(body = body,
                                                      media_body =
                                                      media_body).execute()

        except Exception as detail:
            # Upload failures can result in a BadStatusLine.
            self.logger.log(
                "Exception while uploading {}: {}.".format(myFile, detail),
                'error')
            success = False

        if not self.__verifyMD5Sum(fullPath, self.fileIDForFileName(myFile)):
            self.logger.log('Failed MD5 checksum verification.', 'INFO')
            success = False

        if success:
            self.logger.log('Verification by MD5 checksum succeeded.', 'INFO')
            self.logger.log("Finished.")
            return result['id']

        if not success and retryCount <= 0:
            return None
        else:
            time.sleep(self.retryDelay)
            self.logger.log('Retrying upload of {}.'.format(fullPath),
                            'warning')
            self.uploadFileToCloudStorage(fullPath = fullPath,
                                          retryCount = retryCount - 1)


    def __retrieveCredentials(self):
        """
        Perform authorization at the server.

        Credentials are loaded into the object attribute googleAPICredentials.
        """

        flow = OAuth2WebServerFlow(self.clientID, self.clientSecret,
                                   self.oauthScope, self.oauthConsent)
        authorize_url = flow.step1_get_authorize_url()
        print 'Go to the following link in your browser: ' + authorize_url
        code = raw_input('Enter verification code: ').strip()
        self.googleAPICredentials = flow.step2_exchange(code)

        print "refresh_token = {}".format(
            self.googleAPICredentials.refresh_token)
        print "expiry = {}".format(self.googleAPICredentials.token_expiry)


    def freeSpace(self):
        """
        Get free space from the drive service.
        :param driveService: Object for the drive service.
        :returns: Int of free space (bytes B) on the drive service.
        """
        aboutData = self.driveService.about().get().execute()
        return int(aboutData['quotaBytesTotal']) - int(
            aboutData['quotaBytesUsed']) - int(
            aboutData['quotaBytesUsedInTrash'])


    def deleteFile(self, fileID = ''):
        """
        Delete the file with ID fileID.
        :param fileID: String of a Google API file ID.
        """

        if not len(fileID) > 0:
            raise Exception("File ID has not been given.")

        self.logger.log(
            'Deleting file with file ID {} and name {}.'.format(fileID,
                                                                self.filenameForFileID(
                                                                    fileID)),
            'debug')

        try:
            # Writing the fileId arg name is required here.
            self.driveService.files().delete(fileId = fileID).execute()

        except errors.HttpError as error:
            self.logger.log('Exception while deleting: {}'.format(error),
                            'error')


    def deleteOutdatedFiles(self, maxAge = datetime.timedelta(weeks = 9999999)):
        """
        Remove outdated files from cloud storage.

        :param minAge: datetime.timedelta of the minimum age before a file is
        considered outdated.
        :param maxAge: datetime.timedelta of the maximum age to consider for
        a file.
        :returns: Int count of deleted items.
        """

        # @todo Return count of actual successfully deleted files.

        outdated = self.outdatedFiles(maxAge)

        """:type : dict"""
        for f in outdated:
            self.deleteFile(f['id'])

        return len(outdated)


    def outdatedFiles(self,
                      daysBeforeOutdated = datetime.timedelta(days = 9999999)):
        """
        Outdated files in the cloud where they are outdated if their age is
        greater than or equal to daysBeforeOutdated.

        Note: When t1 is the same day as t2, the timedelta comes back as -1.
        Not sure why this isn't represented as zero. Perhaps to avoid a false
        evaluation of a predicate on a tdelta.

        :param daysBeforeOutdated: datetime.timedelta where the value
        indicates that outdated files that have an age greater than this
        parameter.
        :return: Int count of deleted items.
        """

        t1 = lambda x: datetime.datetime.strptime(x['createdDate'],
                                                  "%Y-%m-%dT%H:%M:%S.%fZ")
        t2 = datetime.datetime.now()

        return filter(lambda x: t2 - t1(x) >= daysBeforeOutdated,
                      self.cloudFiles['items'])


    def sendNotificationOfFiles(self):
        """
        Provide a notification that lists the export files along with sharing
        links.
        """

        pass


    def sendDownloadableFiles(self):
        """
        Send available files via HTTP POST.
        :returns: None
        """

        myPath = '{}/{}'.format(self.exportTempWorkPath,
                                'list-of-downloadable-files.txt')

        fp = open(myPath, 'wb')

        output = StringIO()
        output.write(self.markdownListOfDownloadableFiles())

        fp.write(self.markdownListOfDownloadableFiles())
        fp.close()

        headers = {'User-Agent': self.postAgent, 'Content-Type': 'text/html'}
        try:
            r = requests.post(self.configer.configOptionValue('Export',
                                                              'export_list_post_url'),
                              output.getvalue(), headers = headers)
            print 'text: {}'.format(r.text)
        except requests.adapters.SSLError as error:
            # @todo Implement alternative verification.
            self.logger.log('SSL error: {}'.format(error), 'error')

        output.close()


    def metadataOfFileID(self, fileID = ''):
        """
        :param fileID: String of a file ID in the cloud.
        :return: Tuple of metadata (name, url, timestamp, size) for a given
        file ID.
        """
        item = [i for i in self.cloudFiles['items'] if i['id'] == fileID][0]
        return (item[u'originalFilename'], item[u'webContentLink'],
                item[u'createdDate'], item[u'fileSize'])


    def listOfDownloadableFiles(self):
        """
        Create a list of downloadable files.
        :returns: List of dicts of files that are downloadable from the cloud.
        """

        files = []
        for i in reversed(sorted(self.cloudFiles['items'],
                                 key = lambda k: k['createdDate'])):
            item = dict()
            item['title'] = i['title']
            item['webContentLink'] = i['webContentLink']
            item['id'] = i['id']
            item['createdDate'] = i['createdDate']
            item['fileSize'] = i['fileSize']
            files.append(item)
        return files


    def markdownListOfDownloadableFiles(self):
        """
        Generate content containing a list of downloadable files in Markdown
        format.

        :returns: String content in Markdown format.
        """

        content = "||*Name*||*Created*||*Size*||\n"
        for i in self.listOfDownloadableFiles():
            content += "||[`{}`]({})".format(i['title'], i['webContentLink'])
            content += "||`{}`".format(i['createdDate'])
            content += "||`{} B`||".format(int(i['fileSize']))
            content += '\n'

        # self.logger.log('content: {}'.format(content))
        return content


    def plaintextListOfDownloadableFiles(self):
        """
        Generate content containing a list of downloadable files in plaintext
        format.

        :returns: String content as plaintext.
        """
        content = ''
        includeLink = False
        for i in reversed(sorted(self.cloudFiles['items'],
                                 key = lambda k: k['createdDate'])):
            if includeLink:
                content += "{}, {}, {}, {} B\n".format(i['title'],
                                                       i['webContentLink'],
                                                       i['createdDate'],
                                                       int(i['fileSize']))
            else:
                content += "{}, {}, {} B\n".format(i['title'], i['createdDate'],
                                                   int(i['fileSize']))

        return content


    def logSuccessfulExport(self, name = '', url = '', datetime = 0, size = 0):
        """
        When an export has been successful, log information about the export
        to the database.

        The items to log include:
        * filename
        * URL
        * timestamp
        * filesize

        :param name: String
        :param url: String
        :param datetime:
        :param size: Int
        :return: True if no errors occurred, else False.
        """

        def exportHistoryColumns():
            return ['name', 'url', 'timestamp', 'size']

        timestamp = lambda \
                datetime: 'to_timestamp(0)' if datetime == 0 else "timestamp " \
                                                                  "'{}'".format(
            datetime)

        sql = 'INSERT INTO "{0}" ({1}) VALUES ({2}, {3}, {4}, {5})'.format(
            self.configer.configOptionValue('Export', 'export_history_table'),
            ','.join(exportHistoryColumns()), "'" + name + "'", "'" + url + "'",
            timestamp(datetime), size)

        conn = MSGDBConnector().connectDB()
        cursor = conn.cursor()
        dbUtil = MSGDBUtil()
        result = dbUtil.executeSQL(cursor, sql, exitOnFail = False)
        conn.commit()
        return result


    def sendExportSummary(self, summary = ''):
        """
        Send a summary of exports via email to a preconfigured list of
        recipients.
        :param summary: String of summary content.
        :return:
        """
        try:
            if self.notifier.sendNotificationEmail(summary, testing = False):
                self.notifier.recordNotificationEvent(
                    types = MSGNotificationHistoryTypes,
                    noticeType = MSGNotificationHistoryTypes.MSG_EXPORT_SUMMARY)
        except Exception as detail:
            self.logger.log('Exception occurred: {}'.format(detail), 'ERROR')


    def currentExportSummary(self):
        """
        Current summary of exports since the last summary report time.

        Summaries are reported with identifier MSG_EXPORT_SUMMARY in the
        NotificationHistory.

        Includes:
        * Number of databases exported
        * Total number of files in the cloud.
        * A report of available storage capacity.
        * A list of available DBs.
        * A link where exports can be accessed.

        :return: String of summary text.
        """
        availableFilesURL = self.configer.configOptionValue('Export',
                                                            'export_list_url')
        lastReportDate = self.notifier.lastReportDate(
            types = MSGNotificationHistoryTypes,
            noticeType = MSGNotificationHistoryTypes.MSG_EXPORT_SUMMARY)
        content = 'Cloud Export Summary:\n\n'
        content += 'Last report date: {}\n'.format(lastReportDate)

        # @TO BE REVIEWED: Verify time zone adjustment.
        content += '{} databases have been exported since the last report ' \
                   'date.\n'.format(self.countOfDBExports(
            lastReportDate + datetime.timedelta(
                hours = 10)) if lastReportDate else self.countOfDBExports())

        content += '{} B free space is available.\n'.format(self.freeSpace())
        content += '\nCurrently available DBs:\n'
        content += self.plaintextListOfDownloadableFiles()
        content += '\n{} files can be accessed through Google Drive (' \
                   'https://drive.google.com) or at {}.'.format(
            self.countOfCloudFiles(), availableFilesURL)

        return content


    def countOfDBExports(self, since = None):
        """
        :param since: datetime indicating last export datetime.
        :return: Int of count of exports.
        """
        myDatetime = lambda x: datetime.datetime.strptime(x, '%Y-%m-%d %H:%S')
        if not since:
            since = myDatetime('1900-01-01 00:00')
        self.logger.log(since.strftime('%Y-%m-%d %H:%M'), 'DEBUG')

        sql = 'SELECT COUNT("public"."ExportHistory"."timestamp") FROM ' \
              '"public"."ExportHistory" WHERE "timestamp" > \'{}\''.format(
            since.strftime('%Y-%m-%d %H:%M'))

        conn = MSGDBConnector().connectDB()
        cursor = conn.cursor()
        dbUtil = MSGDBUtil()
        rows = None
        if dbUtil.executeSQL(cursor, sql, exitOnFail = False):
            rows = cursor.fetchall()
        assert len(rows) == 1, 'Invalid return value.'
        return rows[0][0]


    def countOfCloudFiles(self):
        """
        :param since: datetime indicating last trailing export datetime.
        :return: Int of count of exports.
        """
        return len(self.cloudFiles['items'])


    def __verifyMD5Sum(self, localFilePath, remoteFileID):
        """
        Verify that the local MD5 sum matches the MD5 sum for the remote file
        corresponding to an ID.

        This verifies that the uploaded file matches the local compressed
        export file.

        :param localFilePath: String of the full path of the local file.
        :param remoteFileID: String of the cloud ID for the remote file.
        :returns: Boolean True if the MD5 sums match, otherwise, False.
        """

        self.logger.log('remote file ID: {}'.format(remoteFileID))
        self.logger.log('local file path: {}'.format(localFilePath))

        # Get the md5sum for the local file.
        f = open(localFilePath, mode = 'rb')
        fContent = hashlib.md5()
        for buf in iter(partial(f.read, 128), b''):
            fContent.update(buf)
        localMD5Sum = fContent.hexdigest()
        f.close()

        self.logger.log('local md5: {}'.format(localMD5Sum), 'DEBUG')

        def verifyFile():
            # Get the MD5 sum for the remote file.
            for item in self.cloudFiles['items']:
                if (item['id'] == remoteFileID):
                    self.logger.log(
                        'remote md5: {}'.format(item['md5Checksum']), 'DEBUG')
                    if localMD5Sum == item['md5Checksum']:
                        return True
                    else:
                        return False

        try:
            if verifyFile():
                return True
            else:
                return False

        except errors.HttpError as detail:
            self.logger.log('HTTP error during MD5 verification.', 'error')

            time.sleep(10)

            if verifyFile():
                return True
            else:
                return False


    def fileIDForFileName(self, filename):
        """
        Get the file ID for the given filename.

        This method supports matching multiple cloud filenames but only
        returns the ID for a single matching filename.

        This can then be called recursively to obtain all the file IDs for a
        given filename.

        :param String of the filename for which to retrieve the ID.
        :returns: String of a cloud file ID or None if no match.
        """
        fileIDList = filter(lambda x: x['originalFilename'] == filename,
                            self.cloudFiles['items'])
        return fileIDList[0]['id'] if len(fileIDList) > 0 else None


    def filenameForFileID(self, fileID = ''):
        """
        :param fileID: String of cloud-based file ID.
        :return: String of filename for a given file ID.
        """
        return filter(lambda x: x['id'] == fileID, self.cloudFiles['items'])[0][
            'originalFilename']


    def addReaders(self, fileID = None, emailAddressList = None,
                   retryCount = 0):
        """
        Add reader permission to an export file that has been uploaded to the
        cloud for the given list of email addresses.

        Email notification is suppressed by default.

        :param fileID: String of the cloud file ID to be processed.
        :param emailAddressList: List of email addresses.
        :returns: Boolean True if successful, otherwise False.
        """
        # @todo Provide support for retry count
        success = True

        self.logger.log('file id: {}'.format(fileID))
        self.logger.log('address list: {}'.format(emailAddressList))

        for addr in emailAddressList:
            permission = {'value': addr, 'type': 'user', 'role': 'reader'}

            if fileID:
                try:
                    resp = self.driveService.permissions().insert(
                        fileId = fileID, sendNotificationEmails = False,
                        body = permission).execute()
                    self.logger.log(
                        'Reader permission added for {}.'.format(addr))
                except errors.HttpError as error:
                    self.logger.log('An error occurred: {}'.format(error))
                    success = False

        if not success and retryCount <= 0:
            return False
        elif success:
            return True
        else:
            time.sleep(self.retryDelay)
            self.logger.log('Retrying adding readers for ID {}.'.format(fileID),
                            'warning')
            self.addReaders(fileID = fileID,
                            emailAddressList = emailAddressList,
                            retryCount = retryCount - 1)
Esempio n. 15
0
class MSGNotifier(object):
    """
    Provides notification service functionality for MSG data processing.

    Email settings are stored in the local configuration.

    Usage:

    from msg_notifier import MSGNotifier
    self.notifier = MSGNotifier()

    Public API:

    sendNotificationEmail(msgBody, testing = False):
        Send msgBody as a notification to the mailing list defined in the
        config file.

    sendMailWithAttachments(msgBody, files = None, testing = False)
        Send msgBody with files attached as a notification to the mailing
        list defined in the config file.

    lastReportDate(noticeType):
        The last date where a notification of the given type was reported.

    recordNotificationEvent(noticeType):
        Record an event in the notification history.
    """


    def __init__(self):
        """
        Constructor.
        """

        warnings.simplefilter('default')
        warnings.warn("This module is deprecated in favor of SEKNotifier.",
                      DeprecationWarning)

        self.config = MSGConfiger()
        self.logger = SEKLogger(__name__, 'info')
        self.connector = MSGDBConnector()
        self.conn = self.connector.connectDB()
        self.cursor = self.conn.cursor()
        self.dbUtil = MSGDBUtil()
        self.noticeTable = 'NotificationHistory'
        self.notificationHeader = "This is a message from the Hawaii Smart " \
                                  "Energy Project MSG Project notification " \
                                  "system.\n\n"

        self.noReplyNotice = '\n\nThis email account is not monitored. No ' \
                             'replies will originate from this ' \
                             'account.\n\nYou are receiving this message ' \
                             'because you are on the recipient list for ' \
                             'notifications for the Hawaii Smart Energy ' \
                             'Project.'


    def sendNotificationEmail(self, msgBody, testing = False):
        """
        This method is an alternative to the multipart method in
        sendMailWithAttachments.

        :param msgBody: The body of the message to be sent.
        :param testing: True if running in testing mode.
        :returns: True for success, False for an error.
        """

        errorOccurred = False
        user = self.config.configOptionValue('Notifications', 'email_username')
        password = self.config.configOptionValue('Notifications',
                                                 'email_password')
        fromaddr = self.config.configOptionValue('Notifications',
                                                 'email_from_address')

        if testing:
            toaddr = self.config.configOptionValue('Notifications',
                                                   'testing_email_recipients')
        else:
            toaddr = self.config.configOptionValue('Notifications',
                                                   'email_recipients')
        server = smtplib.SMTP(self.config.configOptionValue('Notifications',
                                                            'smtp_server_and_port'))

        try:
            server.starttls()
        except smtplib.SMTPException as detail:
            errorOccurred = True
            self.logger.log("Exception during SMTP STARTTLS: {}".format(detail),
                            'ERROR')

        try:
            server.login(user, password)
        except smtplib.SMTPException as detail:
            errorOccurred = True
            self.logger.log("Exception during SMTP login: %s" % detail, 'ERROR')

        senddate = datetime.now().strftime('%Y-%m-%d')
        subject = "HISEP Notification"

        msgHeader = "Date: {}\r\nFrom: {}\r\nTo: {}\r\nSubject: {" \
                    "}\r\nX-Mailer: My-Mail\r\n\r\n".format(senddate, fromaddr,
                                                            toaddr, subject)

        msgBody = self.notificationHeader + msgBody

        msgBody += self.noReplyNotice

        try:
            self.logger.log("Send email notification.", 'INFO')
            server.sendmail(fromaddr, toaddr, msgHeader + msgBody)
            server.quit()
        except smtplib.SMTPException as detail:
            errorOccurred = True
            self.logger.log("Exception during SMTP sendmail: {}".format(detail),
                            'ERROR')

        return errorOccurred != True


    def sendMailWithAttachments(self, msgBody, files = None, testing = False):
        """
        Send email along with attachments.

        :param msgBody: String containing the body of the messsage to send.
        :param files: List of file paths. This is a mutable argument that
        should be handled carefully as the default is defined only once.
        :param testing: True if running in testing mode.
        :returns: True if no exceptions are raised.
        """

        if files is None:
            files = []

        sys.stderr.write("Sending multipart email.\n")
        if testing:
            self.logger.log("Notification testing mode is ON.\n", 'info')

        errorOccurred = False
        assert type(files) == list

        user = self.config.configOptionValue('Notifications', 'email_username')
        password = self.config.configOptionValue('Notifications',
                                                 'email_password')

        if testing:
            send_to = self.config.configOptionValue('Notifications',
                                                    'testing_email_recipients')
        else:
            send_to = self.config.configOptionValue('Notifications',
                                                    'email_recipients')

        send_from = self.config.configOptionValue('Notifications',
                                                  'email_from_address')

        msg = MIMEMultipart()
        msg['From'] = send_from
        msg['To'] = send_to
        msg['Date'] = formatdate(localtime = True)
        msg['Subject'] = "HISEP Notification"

        msg.attach(MIMEText(msgBody))

        for f in files:
            sys.stderr.write("Attaching file %s.\n" % f)
            part = MIMEBase('application', "octet-stream")
            part.set_payload(open(f, "rb").read())
            Encoders.encode_base64(part)
            part.add_header('Content-Disposition',
                            'attachment; filename="%s"' % os.path.basename(f))
            msg.attach(part)

        server = smtplib.SMTP(self.config.configOptionValue('Notifications',
                                                            'smtp_server_and_port'))
        try:
            server.starttls()
        except smtplib.SMTPException as detail:
            errorOccurred = True
            self.logger.log("Exception during SMTP STARTTLS: %s" % detail,
                            'ERROR')

        try:
            server.login(user, password)
        except smtplib.SMTPException as detail:
            errorOccurred = True
            self.logger.log("Exception during SMTP login: %s" % detail, 'ERROR')

        self.logger.log("Send email notification.", 'INFO')

        try:
            server.sendmail(send_from, send_to, msg.as_string())
        except smtplib.SMTPException as detail:
            errorOccurred = True
            self.logger.log("Exception during SMTP sendmail: %s" % detail,
                            'ERROR')

        server.quit()

        if errorOccurred == False:
            self.logger.log('No exceptions occurred.\n', 'info')

        return errorOccurred


    def recordNotificationEvent(self, noticeType = None):
        """
        Save a notification event to the notification history.
        :param table: String
        :param noticeType: <enum 'MSGNotificationHistoryTypes'>
        :returns: Boolean
        """

        if not noticeType:
            return False
        if not noticeType in MSGNotificationHistoryTypes:
            return False

        cursor = self.cursor
        sql = """INSERT INTO "{}" ("notificationType", "notificationTime")
        VALUES ('{}', NOW())""".format(self.noticeTable, noticeType.name)
        success = self.dbUtil.executeSQL(cursor, sql)
        self.conn.commit()
        if not success:
            raise Exception('Exception while saving the notification time.')
        return success


    def lastReportDate(self, noticeType = None):
        """
        Get the last time a notification was reported for the given
        noticeType.

        :param noticeType: String indicating the type of the
        notification. It is stored in the event history.
        :returns: datetime of last report date.
        """

        if not noticeType or (not noticeType in MSGNotificationHistoryTypes):
            raise Exception('Invalid notice type.')

        cursor = self.cursor

        sql = 'SELECT MAX("notificationTime") FROM "{}" WHERE ' \
              '"notificationType" = \'{}\''.format(self.noticeTable,
                                                   noticeType.name)

        success = self.dbUtil.executeSQL(cursor, sql)
        if success:
            rows = cursor.fetchall()

            if not rows[0][0]:
                return None
            else:
                return rows[0][0]
        else:
            raise Exception('Exception during getting last report date.')
    result = insertDataWrapper(path)
    pattern = 'Process-(\d+),'
    jobString = str(multiprocessing.current_process())
    match = re.search(pattern, jobString)
    assert match.group(1) is not None, "Process ID was matched."
    returnDict[match.group(1)] = result


if __name__ == '__main__':

    processCommandLineArguments()

    inserter = Inserter()

    if COMMAND_LINE_ARGS.testing:
        logger.log("Testing mode is ON.\n", 'info')
    if COMMAND_LINE_ARGS.email:
        logger.log("Email will be sent.\n", 'info')

    msg = ''  # Used for the notification message.
    msgBody = ''  # Used for the notification message.
    databaseName = ''

    if COMMAND_LINE_ARGS.testing:
        databaseName = configer.configOptionValue("Database", "testing_db_name")
    else:
        databaseName = configer.configOptionValue("Database", "db_name")

    msg = "Recursively inserting data to the database named %s." % databaseName
    print msg
    msgBody += msg + "\n"
    """

    global COMMAND_LINE_ARGS
    parser = argparse.ArgumentParser(description = '')
    parser.add_argument('--dbname', help = 'Database file to be uploaded.')
    parser.add_argument('--fullpath',
                        help = 'Full path to database file to be uploaded.')
    parser.add_argument('--testing', action = 'store_true', default = False)

    COMMAND_LINE_ARGS = parser.parse_args()


if __name__ == '__main__':
    logger = SEKLogger(__name__, 'INFO')

    logger.log("Exporting DBs to cloud.")
    processCommandLineArguments()

    exporter = MSGDBExporter()
    notifier = MSGNotifier()
    exporter.logger.shouldRecord = True

    startTime = time.time()
    dbs = exporter.configer.configOptionValue('Export', 'dbs_to_export').split(
        ',')
    fileIDs = exporter.exportDBs(databases = dbs, toCloud = True,
                                 testing = COMMAND_LINE_ARGS.testing,
                                 deleteOutdated = True)

    wallTime = time.time() - startTime
    wallTimeMin = int(wallTime / 60.0)
                logger.log('len procs {}'.format(len(procs)), CRITICAL)

                # cleanup processes
                for i in range(len(procs)):
                    q.put('STOP')
                q.close()
                q.join()

                for p in procs:
                    p.join()

            except Exception as detail:
                logger.log("Exception {}".format(detail), ERROR)

        multiProcess(paths)
        logger.log('final row count {}'.format(counter.rowsValue()))
        assert counter.pathsValue() == TOTAL_PATHS

    else:
        # Single core:
        for p in paths:
            loader = SingleFileLoader(p)
            name = loader.meterName()

            if name in RESULT_CNTS:
                tupleResult = loader.insertDataFromFile()
                RESULT_CNTS[name] += tupleResult[0]
                EXCEPTION_CNTS[name] += tupleResult[1]
            else:
                (RESULT_CNTS[name],
                 EXCEPTION_CNTS[name]) = loader.insertDataFromFile()
Esempio n. 19
0
class MECOXMLParser(object):
    """
    Parses XML for MECO data.
    """

    tableName = ''

    def __init__(self, testing = False):
        """
        Constructor.

        :param testing: (optional) Boolean indicating if Testing Mode is on.
        """

        self.logger = SEKLogger(__name__, 'silent')

        if (testing):
            self.logger.log("Testing Mode is ON.", 'info')

        self.debug = False
        self.configer = MSGConfiger()
        if self.configer.configOptionValue("Debugging", 'debug') == True:
            self.debug = True

        self.util = MSGDBUtil()
        self.mapper = MECOMapper()
        self.connector = MSGDBConnector(testing)
        self.conn = self.connector.connectDB()
        self.filename = None
        self.fileObject = None
        self.processForInsertElementCount = 0
        self.inserter = MECODBInserter()
        self.insertDataIntoDatabase = False

        # Count number of times sections in source data are encountered.
        self.tableNameCount = {'SSNExportDocument': 0, 'MeterData': 0,
                               'RegisterData': 0, 'RegisterRead': 0, 'Tier': 0,
                               'Register': 0, 'IntervalReadData': 0,
                               'Interval': 0, 'Reading': 0, 'IntervalStatus': 0,
                               'ChannelStatus': 0, 'EventData': 0, 'Event': 0}

        # Use this dictionary to track which channels were processed when
        # readings are being processed. this is to prevent duplicate channel
        # data from being inserted.
        self.channelProcessed = {}

        self.initChannelProcessed()

        # Tables to be inserted to.
        self.insertTables = self.configer.insertTables

        self.lastSeqVal = None
        self.fKeyVal = None
        self.lastTable = None
        self.fkDeterminer = MECOFKDeterminer()
        self.dupeChecker = MECODupeChecker()
        self.currentMeterName = None
        self.currentIntervalEndTime = None
        self.currentRegisterReadReadTime = None
        self.dupesExist = False
        self.channelDupeExists = False # For Reading dupes.
        self.numberDupeExists = False # For Register dupes.
        self.eventTimeDupeExists = False # For Event dupes.
        self.commitCount = 0
        self.readingDupeOnInsertCount = 0 # For Reading dupes.
        self.registerDupeOnInsertCount = 0 # For Register dupes.
        self.eventDupeOnInsertCount = 0 # For Event dupes.
        self.dataProcessCount = 0
        self.readingDupeCheckCount = 0 # For Reading dupes.
        self.registerDupeCheckCount = 0 # For Register dupes.
        self.eventDupeCheckCount = 0 # For Event dupes.
        self.insertCount = 0
        self.cumulativeInsertCount = 0
        self.nonProcessForInsertElementCount = 0
        self.readingInsertCount = 0
        self.registerInsertCount = 0
        self.eventInsertCount = 0
        self.totalReadingInsertCount = 0
        self.totalRegisterInsertCount = 0
        self.totalEventInsertCount = 0
        self.totalReadingDupeOnInsertCount = 0
        self.totalRegisterDupeOnInsertCount = 0
        self.totalEventDupeOnInsertCount = 0


    def parseXML(self, fileObject, insert = False, jobID = ''):
        """
        Parse an XML file.

        :param fileObject: a file object referencing an XML file.
        :param insert: (optional) True to insert to the database | False to
        perform no
        inserts.
        :returns: String containing a concise log of parsing.
        """

        print "parseXML:"

        self.commitCount = 0
        self.insertDataIntoDatabase = insert

        parseMsg = "\nParsing XML in %s.\n" % self.filename
        sys.stderr.write(parseMsg)
        parseLog = parseMsg

        tree = ET.parse(fileObject)
        root = tree.getroot()

        parseLog += self.walkTheTreeFromRoot(root, jobID = jobID)

        return parseLog


    def tableNameForAnElement(self, element):
        """
        Get the tablename for an element.

        :param element: Element tree element.
        :returns: table name
        """

        try:
            name = re.search('\{.*\}(.*)', element.tag).group(1)
        except:
            name = None
        return name


    def processDataToBeInserted(self, columnsAndValues, currentTableName,
                                fKeyValue, parseLog, pkeyCol, jobID = ''):
        """
        This is the method that performs insertion of parsed data to the
        database. Duplicate checks are performed on the endpoints of the data
         branches.

        :param columnsAndValues: A dictionary containing columns and their
        values.
        :param currentTableName: The name of the current table.
        :param fKeyValue: The value of the foreign key.
        :param parseLog: String containing a concise log of operations.
        :param pkeyCol: Column name for the primary key.
        :param jobID: Identifier for multiprocessing process.
        :returns: A string containing the parse log.
        """

        self.dataProcessCount += 1

        # Handle a special case for duplicate reading data.
        # Intercept the duplicate reading data before insert.
        if currentTableName == "Reading":
            self.channelDupeExists = self.dupeChecker.readingBranchDupeExists(
                self.conn, self.currentMeterName, self.currentIntervalEndTime,
                columnsAndValues['Channel'])
            self.readingDupeCheckCount += 1

        if currentTableName == "Register":
            self.numberDupeExists = self.dupeChecker.registerBranchDupeExists(
                self.conn, self.currentMeterName,
                self.currentRegisterReadReadTime, columnsAndValues['Number'])
            self.registerDupeCheckCount += 1

        if currentTableName == "Event":
            self.eventTimeDupeExists = self.dupeChecker.eventBranchDupeExists(
                self.conn, self.currentMeterName, columnsAndValues['EventTime'])
            self.eventDupeCheckCount += 1

        # Only perform an insert if there are no duplicate values
        # for the channel.
        if not self.channelDupeExists and not self.numberDupeExists and not \
            self.eventTimeDupeExists:

            # ***********************
            # ***** INSERT DATA *****
            # ***********************
            cur = self.inserter.insertData(self.conn, currentTableName,
                                           columnsAndValues,
                                           fKeyVal = fKeyValue,
                                           withoutCommit = 1)
            # The last 1 indicates don't commit. Commits are handled externally.
            self.insertCount += 1
            self.cumulativeInsertCount += 1

            # Only attempt getting the last sequence value if an insertion
            # took place.
            self.lastSeqVal = self.util.getLastSequenceID(self.conn,
                                                          currentTableName,
                                                          pkeyCol)
            # Store the primary key.
            self.fkDeterminer.pkValforCol[pkeyCol] = self.lastSeqVal

            if currentTableName == "Reading":
                self.readingInsertCount += 1
                self.totalReadingInsertCount += 1
            elif currentTableName == "Register":
                self.registerInsertCount += 1
                self.totalRegisterInsertCount += 1
            elif currentTableName == "Event":
                self.eventInsertCount += 1
                self.totalEventInsertCount += 1

        else: # Don't insert into Reading or Register table if a dupe exists.

            if (self.channelDupeExists):
                self.readingDupeOnInsertCount += 1
                self.totalReadingDupeOnInsertCount += 1
                if self.readingDupeOnInsertCount > 0 and self\
                    .readingDupeOnInsertCount < 2:
                    parseLog += self.logger.logAndWrite(
                        "%s:{rd-dupe==>}" % jobID)

                # Also, verify the data is equivalent to the existing record.
                matchingValues = self.dupeChecker.readingValuesAreInTheDatabase(
                    self.conn, columnsAndValues)
                assert matchingValues == True, "Duplicate check found " \
                                               "non-matching values for meter" \
                                               " %s," \
                                               " endtime %s, channel %s (%s, " \
                                               "%s)." % (
                                                   self.currentMeterName,
                                                   self.currentIntervalEndTime,
                                                   columnsAndValues['Channel'],
                                                   columnsAndValues['RawValue'],
                                                   columnsAndValues['Value'])

                self.channelDupeExists = False

            elif (self.numberDupeExists):
                self.registerDupeOnInsertCount += 1
                self.totalRegisterDupeOnInsertCount += 1
                if self.registerDupeOnInsertCount > 0 and self\
                    .registerDupeOnInsertCount < 2:
                    parseLog += self.logger.logAndWrite(
                        "%s:{re-dupe==>}" % jobID)

                self.numberDupeExists = False

            elif (self.eventTimeDupeExists):
                self.eventDupeOnInsertCount += 1
                self.totalEventDupeOnInsertCount += 1
                if self.eventDupeOnInsertCount > 0 and self\
                    .eventDupeOnInsertCount < 2:
                    parseLog += self.logger.logAndWrite(
                        "%s:{ev-dupe==>}" % jobID)

                self.eventTimeDupeExists = False

            else:
                assert True == False, "Duplicate condition does not exist."

            self.logger.log('Record not inserted for %s.' % columnsAndValues,
                            'silent')

        return parseLog

    def generateConciseLogEntries(self, jobID = '', reportType = None):
        """
        Create log entries in the concise log.

        :param jobID: Identifier used to distinguish multiprocessing jobs.
        :returns: A concatenated string of log entries.
        """

        # @todo Change report type to enum type.

        log = ''
        if reportType == 'FINAL':
            self.logger.log('Final report', 'info')

            if self.readingDupeOnInsertCount > 0 or self\
                .registerDupeOnInsertCount > 0 or self.eventDupeOnInsertCount\
                    > 0:
                log = self.logger.logAndWrite("%s:{%srd,%sre,%sev}" % (
                    jobID, self.totalReadingDupeOnInsertCount,
                    self.totalRegisterDupeOnInsertCount,
                    self.totalEventDupeOnInsertCount))
            else:
                log = ''
            log += self.logger.logAndWrite("(%s)" % self.commitCount)
            log += self.logger.logAndWrite(
                "[%s]" % self.processForInsertElementCount)
            log += self.logger.logAndWrite("<%srd,%sre,%sev,%s>" % (
                self.totalReadingInsertCount, self.totalRegisterInsertCount,
                self.totalEventInsertCount, self.cumulativeInsertCount))

        elif reportType == 'INTERMEDIARY':

            if self.readingDupeOnInsertCount > 0 or self\
                .registerDupeOnInsertCount > 0 or self.eventDupeOnInsertCount\
                    > 0:
                log = self.logger.logAndWrite("%s:{%srd,%sre,%sev}" % (
                    jobID, self.readingDupeOnInsertCount,
                    self.registerDupeOnInsertCount,
                    self.eventDupeOnInsertCount))
            else:
                log = ''
            log += self.logger.logAndWrite("(%s)" % self.commitCount)
            log += self.logger.logAndWrite(
                "[%s]" % self.processForInsertElementCount)
            log += self.logger.logAndWrite("<%srd,%sre,%sev,%s,%s>" % (
                self.readingInsertCount, self.registerInsertCount,
                self.eventInsertCount, self.insertCount,
                self.cumulativeInsertCount))
        return log

    def resetGroupCounters(self):
        """
        Reset counters that are keeping track of groups.
        """

        self.readingDupeOnInsertCount = 0
        self.insertCount = 0
        self.readingInsertCount = 0
        self.registerDupeOnInsertCount = 0
        self.registerInsertCount = 0
        self.eventInsertCount = 0
        self.eventDupeOnInsertCount = 0

    def performTableBasedOperations(self, columnsAndValues, currentTableName,
                                    element):
        """
        Perform operations that are based on the current table.

        :param columnsAndValues
        :param currentTableName
        :param element
        """

        if currentTableName == "MeterData":
            self.currentMeterName = columnsAndValues['MeterName']

        elif currentTableName == "Interval":
            self.currentIntervalEndTime = columnsAndValues['EndTime']

        elif currentTableName == "RegisterRead":
            self.currentRegisterReadReadTime = columnsAndValues['ReadTime']

        elif currentTableName == "Event":
            columnsAndValues['Event_Content'] = element.text


    def walkTheTreeFromRoot(self, root, jobID = ''):
        """
        Walk an XML tree from its root node.

        :param root: The root node of an XML tree.
        :param jobID: Identifier used to distinguish multiprocessing jobs.
        :returns: String containing a concise log of parsing activity.
        """

        parseLog = ''
        walker = root.iter()

        for element, nextElement in self.getNext(walker):
            # Process every element in the tree while reading ahead to get
            # the next element.

            currentTableName = self.tableNameForAnElement(element)
            nextTableName = self.tableNameForAnElement(nextElement)
            assert currentTableName is not None, "Current table does not exist."

            # Maintain a count of tables encountered.
            self.tableNameCount[currentTableName] += 1

            columnsAndValues = {}
            it = iter(sorted(element.attrib.iteritems()))

            for item in list(it):
                # Create a dictionary of column names and values.
                columnsAndValues[item[0]] = item[1]

            if currentTableName in self.insertTables:
                # Check if the current table is one of the tables to have data
                # inserted.

                self.processForInsertElementCount += 1

                if self.debug:
                    self.logger.log("Processing table %s, next is %s." % (
                        currentTableName, nextTableName), 'debug')

                # Get the column name for the primary key.
                pkeyCol = self.mapper.dbColumnsForTable(currentTableName)[
                    '_pkey']

                fkeyCol = None
                fKeyValue = None

                try:
                    # Get the column name for the foreign key.
                    fkeyCol = self.mapper.dbColumnsForTable(currentTableName)[
                        '_fkey']
                except:
                    pass

                if self.debug:
                    self.logger.log("foreign key col (fkey) = %s" % fkeyCol,
                                    'debug')
                    self.logger.log("primary key col (pkey) = %s" % pkeyCol,
                                    'debug')
                    self.logger.log(columnsAndValues, 'debug')

                if fkeyCol is not None:
                    # Get the foreign key value.
                    fKeyValue = self.fkDeterminer.pkValforCol[fkeyCol]

                if self.debug:
                    self.logger.log("fKeyValue = %s" % fKeyValue, 'debug')

                self.performTableBasedOperations(columnsAndValues,
                                                 currentTableName, element)

                if self.insertDataIntoDatabase:
                    # Data is intended to be inserted into the database.
                    parseLog = self.processDataToBeInserted(columnsAndValues,
                                                            currentTableName,
                                                            fKeyValue, parseLog,
                                                            pkeyCol,
                                                            jobID = jobID)

                if self.debug:
                    self.logger.log("lastSeqVal = ", self.lastSeqVal)

                if self.lastReading(currentTableName, nextTableName):
                    # The last reading set has been reached.

                    if self.debug:
                        self.logger.log("----- last reading found -----",
                                        'debug')

                    parseLog += self.generateConciseLogEntries(jobID = jobID,
                                                               reportType =
                                                               'INTERMEDIARY')
                    self.resetGroupCounters()

                    parseLog += self.logger.logAndWrite("*")
                    self.commitCount += 1
                    self.conn.commit()

                if self.lastRegister(currentTableName, nextTableName):
                    # The last register set has been reached.

                    if self.debug:
                        self.logger.log("----- last register found -----",
                                        'debug')


        # Initial commit.
        if self.commitCount == 0:
            parseLog += self.generateConciseLogEntries(jobID = jobID,
                                                       reportType =
                                                       'INTERMEDIARY')
        self.resetGroupCounters()

        # Final commit.
        parseLog += self.logger.logAndWrite("---")
        parseLog += self.generateConciseLogEntries(jobID = jobID,
                                                   reportType = 'FINAL')
        self.resetGroupCounters()

        parseLog += self.logger.logAndWrite("*")
        self.commitCount += 1
        self.conn.commit()
        sys.stderr.write("\n")

        self.logger.log("Data process count = %s." % self.dataProcessCount,
                        'info')
        self.logger.log(
            "Reading dupe check count = %s." % self.readingDupeCheckCount,
            'info')
        return parseLog


    def lastReading(self, currentTable, nextTable):
        """
        Determine if the last reading is being visited.

        :param currentTable: current table being processsed.
        :param nextTable: next table to be processed.
        :returns: True if last object in Reading table was read,
        otherwise return False.
        """

        if currentTable == "Reading" and (
                    nextTable == "MeterData" or nextTable == None):
            return True
        return False


    def lastRegister(self, currentTable, nextTable):
        """
        Determine if the last register is being visited.

        :param currentTable: current table being processsed.
        :param nextTable: next table to be processed.
        :returns: True if last object in Register table was read,
        otherwise return False.
        """

        if currentTable == "Register" and (
                    nextTable == "MeterData" or nextTable == None):
            return True
        return False


    def getNext(self, somethingIterable, window = 1):
        """
        Return the current item and next item in an iterable data structure.

        :param somethingIterable: Something that has an iterator.
        :param window: How far to look ahead in the collection.
        :returns: The current iterable value and the next iterable value.
        """

        items, nexts = tee(somethingIterable, 2)
        nexts = islice(nexts, window, None)
        return izip_longest(items, nexts)


    def initChannelProcessed(self):
        """
        Initialize the dictionary for channel processing.
        """

        self.channelProcessed = {'1': False, '2': False, '3': False, '4': False}


    def getLastElement(self, rows):
        """
        Get the last element in a collection.

        Example:
            rows = (element1, element2, element3)
            getLastElement(rows) # return element3

        :param rows: Result rows from a query.
        :returns: The last element in the collection.
        """

        for i, var in enumerate(rows):
            if i == len(rows) - 1:
                return var
class MSGFileUtil(object):
    """
    Utilities related to files and directories.
    """

    def __init__(self):
        """
        Constructor.
        """
        warnings.simplefilter('default')
        warnings.warn("This module is deprecated in favor of SEKFileUtil.",
                      DeprecationWarning)
        self.logger = SEKLogger(__name__, 'DEBUG')


    def validDirectory(self, path):
        """
        Verify that the path is a valid directory.

        :param path: Path to check.
        :returns: True if path is a valid directory.
        """

        if os.path.exists(path) and os.path.isdir(path):
            return True
        else:
            return False


    def md5Checksum(self, fullPath):
        """
        Get the MD5 checksum for the file given by fullPath.

        :param fullPath: Full path of the file to generate for which to
        generate a checksum.
        :returns: MD5 checksum value as a hex digest.
        """

        try:
            f = open(fullPath, mode = 'rb')
            content = hashlib.md5()
            for buf in iter(partial(f.read, 128), b''):
                content.update(buf)
            md5sum = content.hexdigest()
            f.close()
            return md5sum
        except IOError as detail:
            self.logger.log(
                'Exception during checksum calculation: %s' % detail, 'ERROR')


    def gzipUncompressFile(self, srcPath, destPath):
        """
        Gzip uncompress a file given by fullPath.

        @todo Need to deal with large file sizes. Stop reading into memory.

        :param srcPath: Full path of the file to be uncompressed.
        :param destPath: Full path of file to be written to.
        """

        self.logger.log(
            'Uncompressing gzip source %s to %s' % (srcPath, destPath), 'DEBUG')
        gzipFile = gzip.open(srcPath, "rb")
        uncompressedFile = open(destPath, "wb")
        decoded = gzipFile.read()
        try:
            uncompressedFile.write(decoded)
        except:
            self.logger.log("Exception while writing uncompressed file.")
        gzipFile.close()
        uncompressedFile.close()


    def gzipCompressFile(self, fullPath):
        """
        Perform gzip compression on a file at fullPath.

        @todo Generalize this method.

        :param fullPath: Full path of the file to be compressed.
        :returns: Boolean: True if successful, False otherwise.
        """

        success = False
        self.logger.log('Gzip compressing %s.' % fullPath)
        try:
            f_in = open('%s' % (fullPath), 'rb')
            f_out = gzip.open('%s.gz' % (fullPath), 'wb')
            f_out.writelines(f_in)
            f_out.close()
            f_in.close()
            success = True
        except IOError as detail:
            self.logger.log('IOError exception while gzipping: %s' % detail, 'ERROR')
        return success


    def splitFile(self, fullPath = '', chunkSize = 0):
        """
        @DEPRECATED

        Split a file into chunks. Write output files to base path of the
        input file.

        Adapted from https://gist.github.com/mattiasostmar/7883550.

        :param fullPath:
        :param chunkSize:
        :returns: A list of file chunks in full path form.
        """

        fChunks = []
        basePath = os.path.dirname(fullPath)
        baseName = os.path.basename(fullPath)
        self.logger.log('basename: %s' % baseName)

        f = open(fullPath, 'rb')
        data = f.read()
        f.close()

        bytes = len(data)

        # Ensure splitting doesn't happen if it's not needed.
        if bytes <= chunkSize:
            return [fullPath]

        chunkNames = []

        fCnt = 0
        for i in range(0, bytes + 1, chunkSize):
            fn1 = "%s/%s.%s" % (basePath, baseName, fCnt)
            self.logger.log("Splitter writing to %s" % fn1, 'INFO')
            fChunks.append(fn1)
            chunkNames.append(fn1)

            try:
                f = open(fn1, 'wb')
                f.write(data[i:i + chunkSize])
                f.close()
            except Exception as detail:
                print "Exception during writing split file: %s" % detail

            fCnt += 1

        return fChunks


    def splitLargeFile(self, fullPath = '', numChunks = 0, chunkSize = 0):
        """
        Split a large file into chunks.

        :param fullPath: String
        :param numChunks: Int number of files to be split into.
        :param chunkSize: @DEPRECATED
        :return: List of file chunk names in full path form.
        """

        fChunks = []
        basePath = os.path.dirname(fullPath)
        baseName = os.path.basename(fullPath)
        self.logger.log('basename: %s' % baseName)

        fp = open(fullPath, 'rb')
        fsize = os.path.getsize(fullPath)
        chunkSize = int(float(fsize) / float(numChunks))
        totalBytes = 0

        self.logger.log('chunk size: %s' % chunkSize)

        if numChunks == 0 or numChunks == 1:
            return [fullPath]

        for x in range(numChunks):

            if x == numChunks - 1:
                chunkSize = fsize - totalBytes

            data = fp.read(chunkSize)
            totalBytes += len(data)
            fout = open("%s/%s.%s" % (basePath, baseName, x), "wb")
            self.logger.log('Writing %s/%s.%s' % (basePath, baseName, x),
                            'debug')
            fChunks.append("%s/%s.%s" % (basePath, baseName, x))

            fout.write(data)
            fout.close()

        return fChunks


    def fileSize(self, fullPath = ''):
        """
        Get the size in bytes for the file at fullPath.

        :param fullPath:
        :return: size in bytes
        """

        return os.path.getsize(fullPath)
Esempio n. 21
0
class SEKLoggerTester(unittest.TestCase):
    def setUp(self):
        self.logger = SEKLogger(__name__, level = DEBUG)
        print 'logger level: %s' % self.logger.loggerLevel

    def testInit(self):
        self.logger.log('Testing init.',level = INFO)

        self.assertIsNotNone(self.logger)

    def testLogRecording(self):
        self.logger.log('Testing log recording.', INFO)

        msg = "Recording test."

        self.logger.startRecording()
        self.logger.log(msg, 'info')
        self.logger.endRecording()
        self.logger.log("This should not be logged.", INFO)

        result = re.search(msg, self.logger.recording).group(0)

        self.logger.log("recording result: %s" % self.logger.recording)

        self.assertEqual(result, msg)

    def testSilentLogging(self):
        return

        self.logger.log('Testing silent logging.', INFO)

        msg = "Recording test."

        self.logger.startRecording()
        self.logger.log(msg, SILENT)
        self.logger.endRecording()

        self.assertEqual(self.logger.recording, '')

    def testDebugLogging(self):
        self.logger.log('Testing debug logging', DEBUG)

    def testDoublingOfLoggingOutput(self):
        self.logger.log('This is a test of doubling of logger output at the beginning of a test.')
class MSGNOAAWeatherDataInserter(object):
    """
    Performs weather data insertion to a database.
    """
    def __init__(self, testing=False):
        """
        Constructor.
        :param testing: True if testing mode is being used.
        """

        self.logger = SEKLogger(__name__, 'info')
        self.dbUtil = MSGDBUtil()
        self.dupeChecker = MSGWeatherDataDupeChecker()

    def insertDataDict(self, conn, tableName, listOfDataDicts, commit=False):
        """
        Given a table name and a dictionary of column names and values,
        insert them to the db.

        :param conn: A database connection.
        :param tableName: Name of the DB table to be inserted to.
        :param columnsAndValues: Dictionary of columns and values to be
        inserted to the DB.
        :param (optional) commit: A flag indicated that DB transactions will
        be committed.
        :returns: Set of datetimes processed.
        """

        cur = conn.cursor()
        processedDateTimes = set()

        for row in listOfDataDicts:

            # Add a creation timestamp using the SQL function.
            row['created'] = 'NOW()'

            cols = []
            vals = []

            for col in row.keys():
                # Prepare the columns and values for insertion via SQL.

                cols.append(col)
                if (row[col] != 'NULL'):
                    # Surround each value with single quotes...
                    vals.append("'%s'" % row[col])
                else:
                    # Except for NULL values.
                    vals.append("%s" % row[col])

            sql = """INSERT INTO "%s" (%s) VALUES (%s)""" % (
                tableName, ','.join(cols), ','.join(vals))

            if self.dupeChecker.duplicateExists(cur, row['wban'],
                                                row['datetime'],
                                                row['record_type']):
                self.logger.log("Dupe found, dropping dupe.", 'info')
            else:
                processedDateTimes.add(
                    dt.datetime.strptime(row['datetime'], "%Y-%m-%d %H:%M"))
                if self.dbUtil.executeSQL(cur, sql, exitOnFail=False) is False:
                    # An error occurred.
                    for col in sorted(row.keys()):
                        print "%s: %s" % (col, row[col])
                    sys.exit(-1)

        if commit:
            try:
                conn.commit()
            except:
                self.logger.log("ERROR: Commit failed.", 'debug')

        return processedDateTimes
Esempio n. 23
0
class MSGConfiger(object):
    """
    Supports system-specific configuration for MECO data processing.
    The site-level configuration file is located in ~/.meco-data-operations.cfg.

    Usage:

    configer = MSGConfiger()

    """
    def __init__(self):
        """
        Constructor.
        """

        self._config = ConfigParser.ConfigParser()
        self.logger = SEKLogger(__name__, 'INFO')
        self.fileUtil = SEKFileUtil()

        # Define tables that will have data inserted. Data will only be inserted
        # to tables that are defined here.
        self.insertTables = ('MeterData', 'RegisterData', 'RegisterRead',
                             'Tier', 'Register', 'IntervalReadData',
                             'Interval', 'Reading', 'EventData', 'Event')

        # Check permissions on the config file. Refuse to run if the permissions
        # are not set appropriately.

        configFilePath = '~/.msg-data-operations.cfg'

        if self.fileUtil.isMoreThanOwnerReadableAndWritable(
                os.path.expanduser(configFilePath)):
            self.logger.log(
                "Configuration file permissions are too permissive. Operation "
                "will not continue.", 'error')
            sys.exit()

        try:
            self._config.read(['site.cfg', os.path.expanduser(configFilePath)])
        except:
            self.logger.log(
                "Critical error: The data in {} cannot be "
                "accessed successfully.".format(configFilePath), 'ERROR')
            sys.exit(-1)

    def configOptionValue(self, section, option):
        """
        Get a configuration value from the local configuration file.
        :param section: String of section in config file.
        :param option: String of option in config file.
        :returns: The value contained in the configuration file.
        """

        try:
            configValue = self._config.get(section, option)
            if configValue == "True":
                return True
            elif configValue == "False":
                return False
            else:
                return configValue
        except:
            self.logger.log(
                "Failed when getting configuration option {} in section {"
                "}.".format(option, section), 'error')
            sys.exit(-1)
class MSGEgaugeNewDataChecker(object):
    """
    Provide notification of newly loaded MSG eGauge data.

    This uses notification type MSG_EGAUGE_SERVICE.
    """

    def __init__(self):
        """
        Constructor.
        """

        print __name__
        self.logger = SEKLogger(__name__)
        self.connector = MSGDBConnector()
        self.dbUtil = MSGDBUtil()
        self.notifier = MSGNotifier()
        self.configer = MSGConfiger()


    def newDataCount(self):
        """
        Measure the amount of new data that is present since the last time
        new data was reported.
        """

        cursor = self.connector.conn.cursor()
        tableName = 'EgaugeEnergyAutoload'
        lastTime = self.lastReportDate('MSG_EGAUGE_SERVICE')
        if lastTime is None:
            lastTime = '1900-01-01'
        sql = """SELECT COUNT(*) FROM "%s" WHERE datetime > '%s'""" % (
            tableName, lastTime)

        success = self.dbUtil.executeSQL(cursor, sql)
        if success:
            rows = cursor.fetchall()

            if not rows[0][0]:
                return 0
            else:
                return rows[0][0]
        else:
            # @todo Raise an exception.
            return None


    def lastReportDate(self, notificationType):
        """
        Get the last time a notification was reported.

        :param notificationType: A string indicating the type of the
        notification. It is stored in the event history.
        :returns: datetime of last report date.
        """

        cursor = self.connector.conn.cursor()
        sql = """SELECT MAX("notificationTime") FROM "%s" WHERE
        "notificationType" = '%s'""" % (
            NOTIFICATION_HISTORY_TABLE, notificationType)

        success = self.dbUtil.executeSQL(cursor, sql)
        if success:
            rows = cursor.fetchall()

            if not rows[0][0]:
                return None
            else:
                return rows[0][0]
        else:
            # @todo Raise an exception.
            return None


    def saveNotificationTime(self):
        """
        Save the notification event to the notification history.
        """

        cursor = self.connector.conn.cursor()
        sql = """INSERT INTO "%s" ("notificationType", "notificationTime")
        VALUES ('MSG_EGAUGE_SERVICE', NOW())""" % NOTIFICATION_HISTORY_TABLE
        success = self.dbUtil.executeSQL(cursor, sql)
        self.connector.conn.commit()
        if not success:
            # @todo Raise an exception.
            self.logger.log(
                'An error occurred while saving the notification time.')


    def sendNewDataNotification(self, testing = False):
        """
        Sending notification reporting on new data being available since the
        last time new data was reported.

        :param testing: Use testing mode when True.
        """

        lastReportDate = self.lastReportDate('MSG_EGAUGE_SERVICE')

        if not lastReportDate:
            lastReportDate = "never"

        msgBody = '\nNew MSG eGauge data has been loaded to %s.' % self\
            .connector.dbName
        msgBody += '\n\n'
        msgBody += 'The new data count is %s readings.' % self.newDataCount()
        msgBody += '\n\n'
        msgBody += 'The last report date was %s.' % lastReportDate
        msgBody += '\n\n'
        self.notifier.sendNotificationEmail(msgBody, testing = testing)
        self.saveNotificationTime()
class MECODBInserter(object):
    """
    Provides methods that perform insertion of MECO data.
    """

    def __init__(self):
        """
        Constructor.
        """

        self.logger = SEKLogger(__name__, 'debug')
        self.mapper = MECOMapper()
        self.dupeChecker = MECODupeChecker()
        self.dbUtil = MSGDBUtil()

    def __call__(self, param):
        print "CallableClass.__call__(%s)" % param

    def insertData(self, conn, tableName, columnsAndValues, fKeyVal = None,
                   withoutCommit = 0):
        """
        Given a table name and a dictionary of column names and values,
        insert them to the DB.

        :param conn: database connection
        :param tableName: name of the db table
        :param columnsAndValues: dictionary of columns and values to be
        inserted to the db
        :param (optional) fKeyVal: an explicit foreign key value
        :param (optional) withoutCommit: a flag indicated that the insert
        will not be immediately committed
        :returns: A database cursor.
        """

        cur = conn.cursor()

        # Get a dictionary of mapped (from DB to source data) column names.
        columnDict = self.mapper.getDBColNameDict(tableName)

        dbColsAndVals = {}

        if VISUALIZE_DATA:
            print "----------" + tableName + "----------"
            print columnDict
            print columnsAndValues

        for col in columnDict.keys():

            # Use default as the value for the primary key so that the
            # private key is obtained from the predefined sequence.
            if col == '_pkey':
                if VISUALIZE_DATA:
                    print columnDict[col], # DB col name.
                    print 'DEFAULT'
                dbColsAndVals[columnDict[col]] = 'DEFAULT'

            # For the foreign key, set the value from the given parameter.
            elif col == '_fkey':
                if VISUALIZE_DATA:
                    print columnDict[col], # DB col name.
                    print fKeyVal
                dbColsAndVals[columnDict[col]] = fKeyVal

            else:
                if VISUALIZE_DATA:
                    print columnDict[col], # DB col name.

                # The Register and Reading tables need to handle NULL
                # values as a special case.
                if tableName == 'Register' or tableName == 'Reading':
                    try:
                        if VISUALIZE_DATA:
                            print columnsAndValues[col] # data source value
                        dbColsAndVals[columnDict[col]] = columnsAndValues[col]
                    except:
                        if VISUALIZE_DATA:
                            print 'NULL'
                        dbColsAndVals[columnDict[col]] = 'NULL'

                # For all other cases, simply pass the value.
                else:
                    if VISUALIZE_DATA:
                        print columnsAndValues[col] # data source value
                    dbColsAndVals[columnDict[col]] = columnsAndValues[col]

        # Add a creation timestamp to MeterData.
        if tableName == 'MeterData':
            dbColsAndVals['created'] = 'NOW()'

        cols = []
        vals = []
        for col in dbColsAndVals.keys():
            cols.append(col)

            # DEFAULT, NULL and NOW() need to appear without quotes.
            if dbColsAndVals[col] in {'DEFAULT', 'NULL', 'NOW()'}:
                vals.append(dbColsAndVals[col])
            else:
                vals.append("'%s'" % dbColsAndVals[
                    col]) # Surround value with single quotes.

        sql = """INSERT INTO "%s" (%s) VALUES (%s)""" % (
        tableName, ','.join(cols), ','.join(vals))

        self.dbUtil.executeSQL(cur, sql)

        if withoutCommit == 0:
            try:
                conn.commit()
            except:
                self.logger.log("ERROR: Commit failed.", 'debug')

        return cur
class MSGDataVerifier(object):
    """
    Perform verification procedures related to data integrity.
    """

    def __init__(self):
        """
        Constructor.
        """

        self.logger = SEKLogger(__name__, 'DEBUG')
        self.cursor = MSGDBConnector().connectDB().cursor()
        self.dbUtil = MSGDBUtil()

    def mecoReadingsDupeCount(self):
        """
        Generate counts of MECO dupe readings.
        """

        dupes = 0
        startDate = lambda y, m: '%d-%02d-%02d' % (y, m, 1)
        endDate = lambda y, m: '%d-%02d-%02d' % (
            y, m, calendar.monthrange(y, m)[1])

        for y in YEARS:
            startDates = [startDate(y, m) for m in
                          map(lambda x: x + 1, range(12))]
            endDates = [endDate(y, m) for m in map(lambda x: x + 1, range(12))]

            for start in startDates:
                cnt = self.__mecoReadingsDupeCount(start, endDates[
                    startDates.index(start)])
                self.logger.log('start: %s, dupe cnt: %s' % (start, cnt),
                                'INFO')
                dupes += cnt

        return dupes


    def __mecoReadingsDupeCount(self, startDate, endDate):
        """

        :param startDate:
        :param endDate:
        :returns: DB row count.
        """

        self.dbUtil.executeSQL(self.cursor, """SELECT "Interval".end_time,
                            "MeterData".meter_name,
                            "Reading".channel
                     FROM "MeterData"
                     INNER JOIN "IntervalReadData" ON "MeterData"
                     .meter_data_id = "IntervalReadData".meter_data_id
                     INNER JOIN "Interval" ON "IntervalReadData"
                     .interval_read_data_id = "Interval".interval_read_data_id
                     INNER JOIN "Reading" ON "Interval".interval_id = "Reading"
                     .interval_id
                     WHERE "Interval".end_time BETWEEN '%s' and '%s'
                     GROUP BY "MeterData".meter_name,
                     "Interval".end_time,
                     "Reading".channel
                     HAVING (COUNT(*) > 1)""" % (startDate, endDate))
        return len(self.cursor.fetchall())


    def egaugeAggregationCount(self):
        """
        There should not be more than 96 15-min interval endpoints within a
        single calendar day for a given sub ID.
        :return:
        """
        pass
Esempio n. 27
0
class MSGDataAggregatorTester(unittest.TestCase):
    """
    Unit tests for MSG Data Aggregator.

    """

    def setUp(self):
        """
        Constructor.
        """
        self.logger = SEKLogger(__name__, 'DEBUG')
        self.aggregator = MSGDataAggregator()
        self.testStart = '2014-01-02 11:59'
        self.testEnd = '2014-01-02 12:14'
        self.rawTypes = ['weather', 'egauge', 'circuit', 'irradiance']

    def testIrradianceFetch(self):
        """
        Test raw data fetching over the testing time interval.
        :return:
        """

        timeCol = 'timestamp'
        rows = []
        for row in self.aggregator.rawData(dataType = 'irradiance',
                                           orderBy = [timeCol, 'sensor_id'],
                                           timestampCol = timeCol,
                                           startDate = self.testStart,
                                           endDate = self.testEnd):
            rows.append(row)
        self.assertIsNotNone(rows, 'Rows are present.')

    def testWeatherFetch(self):
        """
        Test raw data fetching over the testing time interval.
        :return:
        """

        timeCol = 'timestamp'
        rows = []
        for row in self.aggregator.rawData(dataType = 'weather',
                                           orderBy = [timeCol],
                                           timestampCol = timeCol,
                                           startDate = self.testStart,
                                           endDate = self.testEnd):
            rows.append(row)
        self.assertIsNotNone(rows, 'Rows are present.')

    def testCircuitFetch(self):
        """
        Test raw data fetching over the testing time interval.
        :return:
        """

        timeCol = 'timestamp'
        rows = []
        for row in self.aggregator.rawData(dataType = 'circuit',
                                           orderBy = [timeCol, 'circuit'],
                                           timestampCol = timeCol,
                                           startDate = self.testStart,
                                           endDate = self.testEnd):
            rows.append(row)
        self.assertIsNotNone(rows, 'Rows are present.')

    def testEgaugeFetch(self):
        """
        Test raw data fetching over the testing time interval.
        :return:
        """

        timeCol = 'datetime'
        rows = []
        for row in self.aggregator.rawData(dataType = 'egauge',
                                           orderBy = [timeCol, 'egauge_id'],
                                           timestampCol = timeCol,
                                           startDate = self.testStart,
                                           endDate = self.testEnd):
            rows.append(row)
        self.assertIsNotNone(rows, 'Rows are present.')


    def testEgaugeAggregation(self):
        """
        Perform aggregation over the testing time interval.
        :return:
        """

        self.logger.log('Testing Egauge aggregation.')
        rowCnt = 0
        agg = self.aggregator.aggregatedData(dataType = 'egauge',
                                             aggregationType = 'agg_egauge',
                                             timeColumnName = 'datetime',
                                             subkeyColumnName = 'egauge_id',
                                             startDate = self.testStart,
                                             endDate = self.testEnd)
        print [col for col in agg.columns]
        for row in agg.data:
            print row
            rowCnt += 1
        self.logger.log('row cnt %d' % rowCnt)
        self.logger.log('agg cols: %d' % len(agg.columns))
        self.assertEqual(rowCnt, 5, 'Row count not correct.')

        self.assertEqual(len(agg.columns), 37,
                         'Egauge columns not equal to 37.')
        self.aggregator.insertAggregatedData(agg = agg)


    def testCircuitAggregation(self):
        """
        Test aggregation over the testing time interval.
        :return:
        """

        self.logger.log('Testing circuit aggregation.')
        rowCnt = 0
        agg = self.aggregator.aggregatedData(dataType = 'circuit',
                                             aggregationType = 'agg_circuit',
                                             timeColumnName = 'timestamp',
                                             subkeyColumnName = 'circuit',
                                             startDate = self.testStart,
                                             endDate = self.testEnd)
        print [col for col in agg.columns]
        for row in agg.data:
            print row
            rowCnt += 1
        self.logger.log('row cnt %d' % rowCnt)
        self.logger.log('agg cols: %d' % len(agg.columns))
        self.assertEqual(rowCnt, 2, 'Row count not correct.')
        self.assertEqual(len(agg.columns), 8, 'Circuit columns not equal to 8.')
        self.aggregator.insertAggregatedData(agg = agg)


    def testIrradianceAggregation(self):
        """
        Test aggregation over the testing time interval.
        :return:
        """

        self.logger.log('Testing irradiance aggregation.')
        rowCnt = 0
        datatype = 'irradiance'
        agg = self.aggregator.aggregatedData(dataType = datatype,
                                             aggregationType = 'agg_irradiance',
                                             timeColumnName = 'timestamp',
                                             subkeyColumnName = 'sensor_id',
                                             startDate = self.testStart,
                                             endDate = self.testEnd)
        for row in agg.data:
            print '%d: %s' % (rowCnt, row)
            rowCnt += 1
        self.logger.log('row cnt %d' % rowCnt)
        self.logger.log('agg cols: %d' % len(agg.columns))
        self.assertEqual(rowCnt, 1, 'Row count not correct.')
        self.assertEqual(len(agg.columns), 3,
                         'Irradiance columns not equal to 3.')
        self.aggregator.insertAggregatedData(agg = agg)


    def testWeatherAggregation(self):
        """
        Test aggregation over the testing time interval.
        :return:
        """

        rowCnt = 0
        agg = self.aggregator.aggregatedData(dataType = 'weather',
                                             aggregationType = 'agg_weather',
                                             timeColumnName = 'timestamp',
                                             subkeyColumnName = None,
                                             startDate = self.testStart,
                                             endDate = self.testEnd)
        for row in agg.data:
            print '%d: %s' % (rowCnt, row)
            rowCnt += 1
        self.assertEqual(rowCnt, 1, 'Row count not correct.')

        self.logger.log('agg cols: %d' % len(agg.columns))
        self.assertEqual(len(agg.columns), 3, 'Weather columns not equal to 3.')
        self.aggregator.insertAggregatedData(agg = agg)


    def test_month_starts_and_ends(self):
        """
        Test retrieving the list of start and end dates for each month in a
        given aggregation time period.

        All starts except one should be at time 00:00:00.

        Starts and ends appear in alternating order but are joined in a tuple.
        """
        # @REVIEWED
        # @todo Optimize by combine start and end tests.

        startCnt = 0
        endCnt = 0

        def test_starts(timeColName, dataType):
            global startCnt
            self.logger.log('testing {},{}'.format(timeColName, dataType))

            # Take every other value from the unzipped pairs.
            starts = [x for x in itertools.islice(
                zip(*self.aggregator.monthStartsAndEnds(timeColName, dataType)),
                0, None, 2)]
            startCnt = len(starts)

            # Test on the flattened start values.
            self.assertLessEqual(len(filter(
                lambda x: x.time() != datetime.strptime('00:00:00',
                                                        '%H:%M:%S').time(),
                list(itertools.chain.from_iterable(starts)))), 1)

        def test_ends(timeColName, dataType):
            global endCnt
            self.logger.log('testing {},{}'.format(timeColName, dataType))

            # Take every other value from the unzipped pairs.
            ends = [x for x in itertools.islice(
                zip(*self.aggregator.monthStartsAndEnds(timeColName, dataType)),
                1, None, 2)]
            endCnt = len(ends)

            # Test on the flattened end values.
            self.assertLessEqual(len(filter(
                lambda x: x.time() != self.aggregator.incrementEndpoint(
                    datetime.strptime('23:59:59', '%H:%M:%S')).time(),
                list(itertools.chain.from_iterable(ends)))), 1)

        for myType in ['weather', 'egauge', 'circuit', 'irradiance']:
            if myType == 'egauge':
                test_starts('datetime', myType)
                test_ends('datetime', myType)
            else:
                test_starts('timestamp', myType)
                test_ends('timestamp', myType)
            self.assertEquals(startCnt, endCnt)


    def testAggregateAllData(self):

        # @todo Revise this test so that live data is not affected.

        return

        for myType in ['weather', 'egauge', 'circuit', 'irradiance']:
            self.aggregator.aggregateAllData(dataType = myType)

    def testExistingIntervals(self):
        self.logger.log('Testing existing intervals.')
        aggType = [('agg_weather', 'timestamp'), ('agg_egauge', 'datetime'),
                   ('agg_circuit', 'timestamp'),
                   ('agg_irradiance', 'timestamp')]
        self.assertEqual(len(
            map(lambda x: self.aggregator.existingIntervals(x[0], x[1])[0],
                aggType)) == len(aggType),
                         'Mismatched existing aggregation intervals.')

    def testUnaggregatedIntervals1(self):
        # @todo provide static test data for this test.
        self.logger.log('testing unagged intervals')
        MINUTE_POSITION = 4
        INTERVAL_DURATION = 15

        weather = []
        for row in self.aggregator.unaggregatedEndpoints('weather',
                                                         'agg_weather',
                                                         'timestamp'):
            self.logger.log('row: {}'.format(row))


    def testUnaggregatedIntervals2(self):
        # @todo provide static test data for this test.
        self.logger.log('testing unagged intervals')
        MINUTE_POSITION = 4
        INTERVAL_DURATION = 15

        egauge = []
        for row in self.aggregator.unaggregatedEndpoints('egauge', 'agg_egauge',
                                                         'datetime',
                                                         'egauge_id'):
            self.logger.log('row: {}'.format(row))


    def testLastAggregationEndpoint(self):
        # Covered by testUnaggregatedIntervals.
        self.logger.log('Testing last agg endpoint')
        print self.aggregator.lastAggregationEndpoint(aggDataType = 'weather',
                                                      timeColumnName =
                                                      'timestamp')

    def testUnaggregatedDataExists(self):
        """

        :return:
        """
        # @todo provide static test data for this test.

        myArgs = [('weather', 'agg_weather', 'timestamp', ''),
                  ('egauge', 'agg_egauge', 'datetime', 'egauge_id'),
                  ('circuit', 'agg_circuit', 'timestamp', 'circuit'),
                  ('irradiance', 'agg_irradiance', 'timestamp', 'sensor_id')]
        self.logger.log(map(
            lambda x: self.aggregator.unaggregatedIntervalCount(dataType = x[0],
                                                                aggDataType = x[
                                                                    1],
                                                                timeColumnName =
                                                                x[2],
                                                                idColumnName =
                                                                x[3]), myArgs))

    def testAggregatedVsNewData(self):
        """

        :return:
        """
        # @todo provide static test data for this test.

        result = self.aggregator.aggregatedVsNewData()

        self.logger.log('result {}'.format(result), 'info')
        self.assertEqual(len(self.aggregator.dataParams.keys()),
                         len(result.keys()),
                         'Result not obtained for each type.')

    def testAggregateNewData(self):
        """
        @IMPORTANT Should not be run on live data.
        :return:
        """

        # return
        map(self.aggregator.aggregateNewData, self.rawTypes)


    def testLastUnaggregatedAndAggregatedEndpoints(self):
        """
        :return:
        """
        # @todo Needs static test data.
        print self.aggregator.lastUnaggregatedAndAggregatedEndpoints(
            dataType = 'egauge')

    def test_endpoint_increment(self):
        myDT = datetime(2014, 02, 01, 23, 45)
        self.logger.log('dt = {}'.format(myDT))
        result = self.aggregator.incrementEndpoint(endpoint = myDT)
        self.logger.log('result {}'.format(result))
        self.assertEqual(result, datetime(2014, 02, 02, 00, 00, 00))
class MSGWeatherDataUtil(object):
    """
    Utility methods for working with weather data.
    """

    def __init__(self):
        """
        Constructor.

        A database connection is not maintained here to keep this class
        lightweight.
        """

        self.logger = SEKLogger(__name__, DEBUG)
        self.configer = MSGConfiger()
        self.url = self.configer.configOptionValue('Weather Data',
                                                   'weather_data_url')
        self.pattern = self.configer.configOptionValue('Weather Data',
                                                       'weather_data_pattern')
        self.fileList = []
        self.dateList = [] # List of dates corresponding weather data files.
        self.fillFileListAndDateList()
        self.dbUtil = MSGDBUtil()


    def fillFileListAndDateList(self):
        """
        Return a list of weather files obtained from the remote server used
        in processing weather data.
        """

        response = urllib2.urlopen(self.url).read()

        self.logger.log('Filling file list:', DEBUG)
        for filename in re.findall(self.pattern, response):
            # Only examine first match group in the filename match.
            self.logger.log('filename {}'.format(filename[0]), DEBUG)
            self.fileList.append(filename[0])
            self.dateList.append(self.datePart(filename[0]))


    def datePart(self, filename = None, datetime = None):
        """
        Return the date part of a NOAA weather data filename.

        :param: String of the filename.
        :param: datetime object.
        :returns: String of the date part of the given parameter.
        """

        assert filename == None or datetime == None, "One argument is allowed."
        if filename:
            newName = filename.replace("QCLCD", '')
            newName = newName.replace(".zip", '')
            return newName
        if datetime:
            return datetime.strftime('%Y-%m-%d')


    def getLastDateLoaded(self, cursor):
        """
        Return the last date of loaded weather data.

        :returns: Last date.
        """

        sql = """select wban, datetime, record_type from "%s"
                 ORDER BY datetime desc limit 1""" % WEATHER_DATA_TABLE

        self.dbUtil.executeSQL(cursor, sql)
        row = cursor.fetchone()
        # self.logger.log('Date last loaded = %s' % row[1], 'info')
        return row[1]


    def getKeepList(self, fileList, cursor):
        """
        The Keep List is the list of filenames of files containing data that are
        *within* the month of the last loaded date or are beyond the last loaded
        date.

        :param: fileList: A list of files containing weather data.
        :param: DB cursor.
        :returns: List of weather data filenames to process.
        """

        keepList = []
        i = 0
        for date in fileList:
            self.logger.log('Examining date %s.' % date)

            # The list date should be the last day of the month.
            # It is the date that is compared against the last retrieved date.

            listDate = dt.datetime.strptime(self.datePart(filename = date),
                                            "%Y%m")
            lastDay = calendar.monthrange(listDate.year, listDate.month)[1]
            listDate = dt.datetime.strptime(
                '%s-%s-%s' % (listDate.year, listDate.month, lastDay),
                "%Y-%m-%d")
            self.logger.log('List date = %s.' % listDate)
            lastDate = self.getLastDateLoaded(cursor)

            self.logger.log('last date = %s' % lastDate)

            if lastDate <= listDate:
                keepList.append((i, listDate))

            i += 1

        if keepList:
            keepList.sort()

        return [fileList[d[0]] for d in keepList]
                                                 sql,
                                                 exitOnFail=False)
                if success:
                    return int(self.cursor.fetchall()[0][0])
            else:
                return None

        id = __meterID(meterName)

        # Python 3: if isinstance( id, int ):
        if isinstance(id, (int, long)):
            return int(id)
        else:
            return __makeNewMeter(meterName)

    def __del__(self):
        self.logger.log('Destroying single file inserter', DEBUG)
        self.conn.close()


if __name__ == '__main__':
    processCommandLineArguments()
    logger = SEKLogger(__name__)
    inserter = SingleFileLoader(COMMAND_LINE_ARGS.filepath)
    if COMMAND_LINE_ARGS.skipNewDataCheck:
        logger.log('result = {}'.format(inserter.insertDataFromFile()))
    elif inserter.newDataForMeterExists():
        logger.log('result = {}'.format(inserter.insertDataFromFile()))
    else:
        logger.log('no new data')
        for line in csv.reader(csvFile, delimiter = ","):
            if lineCnt != 0: # Skip header.
                data = line[0:len(cols)] # Overshoot columns to get the last column.

                for i in range(0, len(cols)):
                    if len(data[i]) == 0:
                        data[i] = 'NULL'
                    else:
                        # Escape single quotes with double single quotes in
                        # PostgreSQL.
                        data[i] = data[i].replace("'", "\'\'")
                        data[i] = "'" + data[i] + "'"

                sql = """INSERT INTO "MeterLocationHistory" (%s) VALUES (%s)""" % (
                    ','.join(cols), ','.join(data))
                logger.log("SQL: %s" % sql, 'debug')
                success = dbUtil.executeSQL(cur, sql)
                if not success:
                    anyFailure = True

            lineCnt += 1

    conn.commit()

    msg = ("Processed %s lines.\n" % lineCnt)
    sys.stderr.write(msg)
    msgBody += msg

    if not anyFailure:
        msg = "Finished inserting Meter Location History records.\n"
        sys.stderr.write(msg)
class SingleFileLoader(object):
    """
    Perform insertion of data contained in a single file to the Smart Inverter database
    specified in the configuration file.
    """
    def __init__(self, filepath=''):
        """
        Constructor.

        :param testing: Flag indicating if testing mode is on.
        """

        self.logger = SEKLogger(__name__, DEBUG)
        self.configer = SIConfiger()
        self.dbUtil = SEKDBUtil()
        self.dataUtil = SIDataUtil()
        self.logger.log('making new db conn for filepath {}'.format(filepath),
                        SILENT)
        sys.stdout.flush()

        try:
            self.conn = SEKDBConnector(
                dbName=self.configer.configOptionValue('Database', 'db_name'),
                dbHost=self.configer.configOptionValue('Database', 'db_host'),
                dbPort=self.configer.configOptionValue('Database', 'db_port'),
                dbUsername=self.configer.configOptionValue(
                    'Database', 'db_username'),
                dbPassword=self.configer.configOptionValue(
                    'Database', 'db_password')).connectDB()
        except:
            raise Exception("Unable to get DB connection.")
        self.cursor = self.conn.cursor()
        self.exitOnError = False

        # An empty file path is used during creating of meter table entries.
        if filepath == '':
            self.filepath = None
            self.meterID = None
            self.meterDataTable = None
        else:
            self.filepath = filepath
            self.meterID = self.getOrMakeMeterID(self.meterName())
            assert self.meterID is not None
            self.meterDataTable = "MeterData_{}".format(self.meterName())
            # @todo Test existence of meter data table.
        self.timestampColumn = 0  # timestamp col in the raw data
        self.exceptionCount = 0

    def newDataForMeterExists(self):
        """
        :return: Boolean true if file has new data.
        """

        try:
            if (self.dataUtil.maxTimeStamp(self.filepath) >
                    self.dataUtil.maxTimeStampDB(self.meterName())):
                return True
            return False
        except TypeError as detail:
            # @todo Log the cause of the exception.
            self.logger.log('Exception: {}'.format(detail), CRITICAL)
            self.exceptionCount += 1
            return False

    def insertDataFromFile(self):
        """
        Process input file as a stream from the object attribute's filepath.
        :return: (Int, Int) Tuple of Int count of inserted records or None on
        error and Int count of exceptions encountered.
        """

        insertCnt = 0
        with open(self.filepath) as dataFile:
            lineCnt = 1
            result = False

            # @todo handle io errors
            self.logger.log('loading data from {}'.format(dataFile), DEBUG)
            for line in dataFile:
                result = self.insertData(
                    line.rstrip('\n')) if lineCnt != 1 else False
                if result is None:
                    self.logger.log('Critical insert failure', CRITICAL)
                    raise Exception('Insert did not complete')
                    # self.logger.log('insert did not complete', ERROR)
                    # return None
                if insertCnt > 0 and insertCnt % COMMIT_INTERVAL == 0:
                    self.conn.commit()
                    self.logger.log('committing at {}'.format(insertCnt),
                                    DEBUG)
                    sys.stdout.flush()
                if result:
                    insertCnt += 1
                lineCnt += 1
            self.conn.commit()
            self.logger.log('final commit at {}'.format(insertCnt), DEBUG)
        return (insertCnt, self.exceptionCount)

    def insertData(self, values, commitOnEvery=False):
        """
        Insert a row of data to the database.
        :param values: String of raw values from the source CSV files.
        :return: Boolean indicating success or failure.
        """

        if not values or self.dataUtil.badData(values):
            return False

        if self.removeDupe(values):
            self.logger.log('duplicate found', DEBUG)

        sql = 'INSERT INTO "{0}" ({1}) VALUES({2}, {3})'.format(
            self.meterDataTable,
            ','.join("\"" + c + "\"" for c in self.dataUtil.dbColumns),
            self.meterID, self.dataUtil.sqlFormattedValues(values))
        self.logger.log('sql: {}'.format(sql), DEBUG)

        if self.dbUtil.executeSQL(self.cursor,
                                  sql,
                                  exitOnFail=self.exitOnError):
            if commitOnEvery:
                self.conn.commit()
            return True
        else:
            return False

    def removeDupe(self, values):
        def deleteDupe(myMeterID, myTimeUTC):
            sql = 'DELETE FROM "{0}" WHERE meter_id = {1} AND time_utc = {' \
                  '2}'.format(self.meterDataTable, myMeterID, myTimeUTC)
            if self.dbUtil.executeSQL(self.cursor,
                                      sql,
                                      exitOnFail=self.exitOnError):
                return True
            else:
                return False

        if not values:
            return False

        timeUTC = self.timeUTC(values)

        # This is dependendent on the quote style used for time UTC in the
        # raw data.
        sql = 'SELECT time_utc FROM "{0}" WHERE meter_id = {1} AND time_utc = ' \
              '{2}'.format(
            self.meterDataTable, self.meterID, timeUTC)

        if self.dbUtil.executeSQL(self.cursor,
                                  sql,
                                  exitOnFail=self.exitOnError):
            rows = self.cursor.fetchone()

            if rows and len(rows) == 1:
                if deleteDupe(self.meterID, timeUTC):
                    return True
                else:
                    raise Exception(
                        "Unable to remove dupe for meter ID {}, time UTC {}".
                        format(self.meterID, timeUTC))
        return False

    def timeUTC(self, values):
        def makeSingleQuotes(x):
            return str(x).replace('"', "'")

        return makeSingleQuotes(values.split(',')[self.timestampColumn])

    def meterName(self):
        """
        The meter name is the name of the containing folder.
        :return:
        """

        # @todo validate meter name
        def validMeterName(name):
            pass

        return os.path.basename(os.path.dirname(self.filepath))

    def getOrMakeMeterID(self, meterName):
        """
        Given a meter name, return its meter ID.
        If the meter name has no ID, create a new one and return its ID.
        :param meterName: String
        :return: Int of meter ID
        """
        def __meterID(name):
            """
            :param name: String of meter name
            :return: Int or None
            """
            sql = 'SELECT meter_id FROM "Meters" WHERE meter_name = \'{' \
                  '}\''.format(name)
            success = self.dbUtil.executeSQL(self.cursor,
                                             sql,
                                             exitOnFail=False)
            if success:
                result = self.cursor.fetchall()
                assert len(result) == 1 or len(result) == 0
                if result:
                    return int(result[0][0])
                else:
                    return None
            else:
                return None

        def __makeNewMeter(name):
            """
            :param name: String of meter name
            :return: Int or None
            """
            id = __meterID(name)
            if id:
                return id

            self.logger.log('making new meter', DEBUG)
            sql = 'INSERT INTO "Meters" (meter_name) VALUES (\'{}\')'.format(
                name)
            success = self.dbUtil.executeSQL(self.cursor,
                                             sql,
                                             exitOnFail=False)
            self.conn.commit()
            if success:
                sql = 'SELECT CURRVAL(\'meter_id_seq\')'
                success = self.dbUtil.executeSQL(self.cursor,
                                                 sql,
                                                 exitOnFail=False)
                if success:
                    return int(self.cursor.fetchall()[0][0])
            else:
                return None

        id = __meterID(meterName)

        # Python 3: if isinstance( id, int ):
        if isinstance(id, (int, long)):
            return int(id)
        else:
            return __makeNewMeter(meterName)

    def __del__(self):
        self.logger.log('Destroying single file inserter', DEBUG)
        self.conn.close()
class MSGDBExporterTester(unittest.TestCase):
    """
    Unit tests for the MSG Cloud Exporter.
    """


    def setUp(self):
        self.logger = SEKLogger(__name__, 'DEBUG')
        self.configer = MSGConfiger()
        self.exporter = MSGDBExporter()
        self.testDir = 'db_exporter_test'
        self.uncompressedTestFilename = 'meco_v3_test_data.sql'
        self.compressedTestFilename = 'meco_v3_test_data.sql.gz'
        self.exportTestDataPath = self.configer.configOptionValue('Testing',
                                                                  'export_test_data_path')
        self.fileUtil = MSGFileUtil()
        self.fileChunks = []
        self.testDataFileID = ''
        self.pyUtil = MSGPythonUtil()
        self.timeUtil = MSGTimeUtil()

        conn = None
        try:
            conn = MSGDBConnector().connectDB()
        except Exception as detail:
            self.logger.log("Exception occurred: {}".format(detail), 'error')
            exit(-1)

        self.logger.log("conn = {}".format(conn), 'debug')
        self.assertIsNotNone(conn)

        # Create a temporary working directory.
        try:
            os.mkdir(self.testDir)
        except OSError as detail:
            self.logger.log(
                'Exception during creation of temp directory: %s' % detail,
                'ERROR')


    def tearDown(self):
        """
        Delete all test items.
        """

        REMOVE_TEMPORARY_FILES = True
        if REMOVE_TEMPORARY_FILES:
            try:
                self.logger.log("Removing local test files {}, {}.".format(
                    self.uncompressedTestFilename, self.compressedTestFilename),
                                'debug')
                os.remove(os.path.join(os.getcwd(), self.testDir,
                                       self.uncompressedTestFilename))
                os.remove(os.path.join(os.getcwd(), self.testDir,
                                       self.compressedTestFilename))
            except OSError as detail:
                self.logger.log(
                    'Exception while removing temporary files: {}'.format(
                        detail), 'SILENT')
            try:
                os.remove(os.path.join(os.getcwd(), self.testDir,
                                       self.compressedTestFilename))
            except OSError as detail:
                self.logger.log(
                    'Exception while removing temporary files: {}'.format(
                        detail), 'SILENT')
            try:
                for f in self.fileChunks:
                    os.remove(f)
            except OSError as detail:
                self.logger.log(
                    'Exception while removing temporary files: {}'.format(
                        detail), 'DEBUG')

        try:
            os.rmdir(self.testDir)
        except OSError as detail:
            self.logger.log(
                'Exception while removing directory: {}'.format(detail),
                'ERROR')

        # Keep deleting from the cloud until there are no more to delete.
        def deleteFromCloud():
            self.logger.log("deleting from cloud", 'debug')
            try:
                fileIDToDelete = self.exporter.fileIDForFileName(
                    self.compressedTestFilename)
                if fileIDToDelete is None:
                    return
                self.logger.log("file ID to delete: {}".format(fileIDToDelete),
                                'DEBUG')
                self.exporter.driveService.files().delete(
                    fileId = '{}'.format(fileIDToDelete)).execute()
                deleteFromCloud()
            except (TypeError, http.HttpError) as e:
                self.logger.log('Delete not successful: {}'.format(e), 'DEBUG')


        deleteFromCloud()


    def _upload_test_data_to_cloud(self):
        """
        Provide an upload of test data that can be used in other tests.

        Side effect: Store the file ID as an ivar.
        """
        self.logger.log("Uploading test data for caller: {}".format(
            self.pyUtil.callerName()))

        filePath = "{}/{}".format(self.exportTestDataPath,
                                  self.compressedTestFilename)
        self.logger.log('Uploaded {}.'.format(filePath), 'info')

        uploadResult = self.exporter.uploadFileToCloudStorage(filePath)
        self.logger.log('upload result: {}'.format(uploadResult))

        self.testDataFileID = self.exporter.fileIDForFileName(
            self.compressedTestFilename)
        self.logger.log("Test file ID is {}.".format(self.testDataFileID))


    def test_markdown_list_of_downloadable_files(self):
        """
        Match the Markdown line entry for the uploaded file.
        """
        # @REVIEWED
        self._upload_test_data_to_cloud()
        self.assertEquals(len(filter(lambda x: self.testDataFileID in x,
                                     self.exporter.markdownListOfDownloadableFiles().splitlines())),
                          1)


    def test_get_md5_sum_from_cloud(self):
        """
        Test retrieving the MD5 sum from the cloud.
        """
        # @REVIEWED
        self.logger.log('Testing getting the MD5 sum.', 'info')
        self._upload_test_data_to_cloud()
        testFileMD5 = filter(lambda x: x['id'] == self.testDataFileID,
                             self.exporter.cloudFiles['items'])[0][
            'md5Checksum']
        self.assertEquals(len(testFileMD5), 32)
        self.assertTrue(re.match(r'[0-9A-Za-z]+', testFileMD5))


    def test_get_file_id_for_nonexistent_file(self):
        """
        Test getting a file ID for a nonexistent file.
        """
        # @REVIEWED
        fileIDs = self.exporter.fileIDForFileName('nonexistent_file')
        self.logger.log("file ids = {}".format(fileIDs), 'info')
        self.assertIsNone(fileIDs)


    def test_upload_test_data(self):
        """
        Upload a test data file for unit testing of DB export.

        The unit test data file is a predefined set of test data stored in
        the test data path of the software distribution.
        """
        # @REVIEWED

        self._upload_test_data_to_cloud()
        self.assertGreater(len(self.testDataFileID), 0)
        self.assertTrue(re.match(r'[0-9A-Za-z]+', self.testDataFileID))


    def test_adding_reader_permissions(self):
        """
        Add reader permissions to a file that was uploaded.
        """
        # @REVIEWED
        self.logger.log("Testing adding reader permissions.")
        self._upload_test_data_to_cloud()

        email = self.configer.configOptionValue('Testing',
                                                'tester_email_address')
        service = self.exporter.driveService
        try:
            id_resp = service.permissions().getIdForEmail(
                email = email).execute()
            print id_resp

        except errors.HttpError as detail:
            print 'Exception while getting ID for email: {}'.format(detail)

        new_permission = {'value': email, 'type': 'user', 'role': 'reader'}
        try:
            self.logger.log('Adding reader permission', 'INFO')
            fileIDToAddTo = self.testDataFileID

            # The permission dict is being output to stdout here.
            resp = service.permissions().insert(fileId = fileIDToAddTo,
                                                sendNotificationEmails = False,
                                                body = new_permission).execute()
        except errors.HttpError as detail:
            self.logger.log(
                'Exception while adding reader permissions: {}'.format(detail),
                'error')


        def permission_id(email):
            try:
                id_resp = service.permissions().getIdForEmail(
                    email = email).execute()
                return id_resp['id']
            except errors.HttpError as error:
                self.logger.log("HTTP error: {}".format(error))


        permission = {}
        try:
            permission = service.permissions().get(fileId = self.testDataFileID,
                                                   permissionId = permission_id(
                                                       email)).execute()
        except errors.HttpError as error:
            self.logger.log("HTTP error: {}".format(error))

        self.assertEquals(permission['role'], 'reader')


    def test_create_compressed_archived(self):
        """
        * Copy test data to a temp directory (self.testDir).
        * Create a checksum for test data.
        * Create a gzip-compressed archive.
        * Extract gzip-compressed archive.
        * Create a checksum for the uncompressed data.
        * Compare the checksums.
        """
        # @REVIEWED

        self.logger.log('Testing verification of a compressed archive.')

        self.logger.log('cwd {}'.format(os.getcwd()))
        fullPath = '{}'.format(os.path.join(os.getcwd(), self.testDir,
                                            self.uncompressedTestFilename))
        shutil.copyfile('{}/{}'.format(self.exportTestDataPath,
                                       self.uncompressedTestFilename), fullPath)

        md5sum1 = self.fileUtil.md5Checksum(fullPath)

        self.exporter.fileUtil.gzipCompressFile(fullPath)

        try:
            os.remove(os.path.join(os.getcwd(), self.testDir,
                                   self.uncompressedTestFilename))
        except OSError as detail:
            self.logger.log('Exception while removing: {}'.format(detail),
                            'ERROR')

        # Extract archived data and generate checksum.
        src = gzip.open('{}{}'.format(fullPath, '.gz'), "rb")
        uncompressed = open(fullPath, "wb")
        decoded = src.read()
        uncompressed.write(decoded)
        uncompressed.close()

        md5sum2 = self.fileUtil.md5Checksum(fullPath)

        self.assertEqual(md5sum1, md5sum2,
                         'Checksums are not equal for original and new '
                         'decompressed archive.')


    def test_export_db(self):
        """
        Perform a quick test of the DB export method using Testing Mode.

        This requires sudo authorization to complete.
        """
        # @REVIEWED

        self.logger.log('Testing exportDB using the testing DB.')

        # @todo handle case where testing db does not exist.

        dbs = ['test_meco']
        ids = self.exporter.exportDBs(databases = dbs, toCloud = True,
                                      localExport = True)
        self.logger.log('Count of exports: {}'.format(len(ids)))
        self.assertEquals(len(ids), 1, "Count of exported files is wrong.")

        map(self.exporter.deleteFile, ids)


    def test_split_archive(self):
        """
        Test splitting an archive into chunks.
        """
        # @REVIEWED
        self.logger.log('Testing archive splitting.')
        fullPath = '{}/{}'.format(self.exportTestDataPath,
                                  self.compressedTestFilename)
        self.logger.log('fullpath: {}'.format(fullPath))
        shutil.copyfile(fullPath, '{}/{}'.format(self.testDir,
                                                 self.compressedTestFilename))
        fullPath = '{}/{}'.format(self.testDir, self.compressedTestFilename)

        self.fileChunks = self.fileUtil.splitLargeFile(fullPath = fullPath,
                                                       numChunks = 3)
        self.assertEquals(len(self.fileChunks), 3)


    def test_get_file_size(self):
        """
        Test retrieving local file sizes.
        """
        # @REVIEWED
        fullPath = '{}/{}'.format(self.exportTestDataPath,
                                  self.compressedTestFilename)
        fSize = self.fileUtil.fileSize(fullPath)
        self.logger.log('size: {}'.format(fSize))
        self.assertEqual(fSize, 12279, 'File size is correct.')


    def test_upload_export_files_list(self):
        """
        TBW
        """
        pass


    def test_checksum_after_upload(self):
        """
        TBW
        """
        pass


    def test_dump_exclusions_dictionary(self):
        """
        Verify the exclusions dictionary by its type.
        :return:
        """
        # @REVIEWED
        exclusions = self.exporter.dumpExclusionsDictionary()

        if exclusions:
            self.assertEquals(type({}), type(exclusions))


    def test_move_to_final(self):
        """
        Test moving a file to the final destination path.
        """
        # @REVIEWED
        self.logger.log('Testing moving to final path {}.'.format(
            self.configer.configOptionValue('Export', 'db_export_final_path')))

        origCompressedFile = '{}/{}'.format(
            self.configer.configOptionValue('Export', 'export_test_data_path'),
            self.compressedTestFilename)
        newCompressedFile = '{}/{}'.format(
            self.configer.configOptionValue('Export', 'export_test_data_path'),
            'temp_test_file')

        shutil.copyfile(origCompressedFile, newCompressedFile)

        self.exporter.moveToFinalPath(compressedFullPath = newCompressedFile)

        self.assertTrue(os.path.isfile('{}/{}'.format(
            self.configer.configOptionValue('Export', 'db_export_final_path'),
            'temp_test_file')))

        # Remove the test file.
        os.remove('{}/{}'.format(
            self.configer.configOptionValue('Export', 'db_export_final_path'),
            'temp_test_file'))


    def test_log_successful_export(self):
        """
        Test logging of export results to the export history table.
        """
        # @REVIEWED
        self.assertTrue(self.exporter.logSuccessfulExport(name = 'test_export',
                                                          url =
                                                          'http://test_url',
                                                          datetime = 0,
                                                          size = 100))

        conn = MSGDBConnector().connectDB()
        cursor = conn.cursor()
        dbUtil = MSGDBUtil()

        self.assertTrue(
            dbUtil.executeSQL(cursor, 'select * from "ExportHistory" where '
                                      'timestamp = '
                                      'to_timestamp(0)'))

        self.assertEqual(len(cursor.fetchall()), 1,
                         "There should only be one result row.")

        self.assertTrue(
            dbUtil.executeSQL(cursor, 'delete from "ExportHistory" where '
                                      'timestamp = to_timestamp(0)'))
        conn.commit()


    def test_metadata_of_file_id(self):
        """
        Test getting the metadata for a file ID.
        """
        # @REVIEWED
        self._upload_test_data_to_cloud()

        self.logger.log('metadata: {}'.format(
            self.exporter.metadataOfFileID(self.testDataFileID)))

        self.assertTrue(re.match(r'[0-9A-Za-z]+', self.testDataFileID))


    def test_filename_for_file_id(self):
        """
        Test returning a file name given a file ID.
        """
        # @REVIEWED
        self._upload_test_data_to_cloud()
        self.assertEquals(
            self.exporter.filenameForFileID(fileID = self.testDataFileID),
            self.compressedTestFilename)


    def test_outdated_files(self):
        # @REVIEWED
        self._upload_test_data_to_cloud()
        time.sleep(1)
        self.logger.log("outdated:")

        # For debugging:
        for item in self.exporter.outdatedFiles(
                daysBeforeOutdated = datetime.timedelta(
                        days = -1)): self.logger.log(
            "name: {}, created date: {}".format(item['originalFilename'],
                                                item['createdDate']), 'debug')

        # Get all the outdated files where outdated is equal to anything
        # uploaded today or later.
        self.assertTrue(self.exporter.outdatedFiles(
            daysBeforeOutdated = datetime.timedelta(days = -1))[0][
                            'id'] == self.testDataFileID)

        self.logger.log('-----')


    def test_delete_outdated(self):
        """
        TBW
        """
        pass


    def test_list_of_downloadable_files(self):
        """
        Test the list of downloadable files used by the available files page.
        """
        # @REVIEWED
        self._upload_test_data_to_cloud()
        self.assertEquals(len(
            filter(lambda row: row['id'] == self.testDataFileID,
                   self.exporter.listOfDownloadableFiles())), 1,
                          "Test file not present.")


    def test_count_of_db_exports(self):
        count = self.exporter.countOfDBExports(EARLIEST_DATE)
        self.logger.log(count,'DEBUG')
        self.assertTrue(int(count) or int(count) == int(0))


    def test_count_of_cloud_files(self):
        count = self.exporter.countOfCloudFiles()
        self.assertTrue(int(count) or int(count) == int(0))


    def test_plaintext_list_of_downloadable_files(self):
        """
        This test handles content both with content links and without content
        links.
        """
        content = self.exporter.plaintextListOfDownloadableFiles()
        self.assertRegexpMatches(content,
                                 '\d+-\d+-\d+.*\,'
                                 '\s+\d+-\d+-\d+T\d+:\d+:\d+\.\d+Z\,\s+\d+\sB')


    def test_last_report_date(self):
        last_report = self.exporter.notifier.lastReportDate(
            types = MSGNotificationHistoryTypes,
            noticeType = MSGNotificationHistoryTypes.MSG_EXPORT_SUMMARY)
        self.assertTrue(last_report is None or last_report > EARLIEST_DATE)

    def test_current_export_summary(self):
        self.assertRegexpMatches(self.exporter.currentExportSummary(),
                                 re.compile(
                                     'last.*databases.*free.*currently.*accessed.*',
                                     flags = re.IGNORECASE | re.DOTALL))
class MSGDataAggregator(object):
    """
    Use for continuous data aggregation of diverse data types relevant to the
    Maui Smart Grid project.

    Four data types are supported:

    1. Irradiance
    2. Temperature/Humidity (weather)
    3. Circuit
    4. eGauge

    The general data form conforms to

    1. timestamp, subkey_id, val1, val2, val3, ...
    2. timestamp, val1, val2, val3, ...

    Case (2) is handled within the same space as (1) by testing for the
    existence of subkeys.

    Current aggregation consists of averaging over **15-min intervals**.

    Aggregation is performed in-memory and saved to the DB. The time range is
    delimited by start date and end date where the values are included in the
    range. The timestamps for aggregation intervals are the last timestamp in a
    respective series.

    * Aggregation subkeys are values such as eGauge IDs or circuit numbers.

    Aggregation is being implemented externally for performance and flexibility
    advantages over alternative approaches such as creating a view. It may be
    rolled into an internal function at future time if that proves to be
    beneficial.

    Usage:

        from msg_data_aggregator import MSGDataAggregator
        aggregator = MSGDataAggregator()

    API:

        aggregateAllData(dataType = dataType)

        aggregateNewData(dataType = dataType)

    """
    def __init__(self,
                 exitOnError=True,
                 commitOnEveryInsert=False,
                 testing=False):
        """
        Constructor.

        :param testing: if True, the testing DB will be connected instead of
        the production DB.
        """

        self.logger = SEKLogger(__name__, 'info')
        self.configer = MSGConfiger()
        self.conn = MSGDBConnector().connectDB()
        self.cursor = self.conn.cursor()
        self.dbUtil = MSGDBUtil()
        self.notifier = MSGNotifier()
        self.mathUtil = MSGMathUtil()
        self.timeUtil = MSGTimeUtil()
        self.nextMinuteCrossing = {}
        self.nextMinuteCrossingWithoutSubkeys = None
        self.exitOnError = exitOnError
        self.commitOnEveryInsert = commitOnEveryInsert
        section = 'Aggregation'
        tableList = [
            'irradiance', 'agg_irradiance', 'weather', 'agg_weather',
            'circuit', 'agg_circuit', 'egauge', 'agg_egauge'
        ]
        self.dataParams = {
            'weather': ('agg_weather', 'timestamp', ''),
            'egauge': ('agg_egauge', 'datetime', 'egauge_id'),
            'circuit': ('agg_circuit', 'timestamp', 'circuit'),
            'irradiance': ('agg_irradiance', 'timestamp', 'sensor_id')
        }
        self.columns = {}

        # tables[datatype] gives the table name for datatype.
        self.tables = {
            t: self.configer.configOptionValue(section, '{}_table'.format(t))
            for t in tableList
        }

        for t in self.tables.keys():
            self.logger.log('t:{}'.format(t), 'DEBUG')
            try:
                self.columns[t] = self.dbUtil.columnsString(
                    self.cursor, self.tables[t])
            except TypeError as error:
                self.logger.log(
                    'Ignoring missing table: Error is {}.'.format(error),
                    'error')

    def existingIntervals(self, aggDataType='', timeColumnName=''):
        """
        Retrieve the existing aggregation intervals for the given data type.

        :param aggDataType: string
        :param timeColumnName: string
        :return: List of intervals.
        """

        return [
            x[0] for x in self.rows(
                """SELECT {0} from \"{1}\" ORDER BY {2}""".format(
                    timeColumnName, self.tables[aggDataType], timeColumnName))
        ]

    def unaggregatedIntervalCount(self,
                                  dataType='',
                                  aggDataType='',
                                  timeColumnName='',
                                  idColumnName=''):
        """
        Return count of unaggregated intervals for a given data type.
        :param dataType:
        :param aggDataType:
        :param timeColumnName:
        :param idColumnName:
        :return: int
        """

        return len(
            self.unaggregatedEndpoints(dataType, aggDataType, timeColumnName,
                                       idColumnName))

    def lastAggregationEndpoint(self, aggDataType='', timeColumnName=''):
        """
        Last aggregation endpoint for a given datatype.

        :param dataType:
        :param timeColumnName:
        :return:
        """

        return self.existingIntervals(aggDataType=aggDataType,
                                      timeColumnName=timeColumnName)[-1]

    def unaggregatedEndpoints(self,
                              dataType='',
                              aggDataType='',
                              timeColumnName='',
                              idColumnName=''):
        """
        Sorted (ascending) endpoints and their IDs, if available,
        for unaggregated intervals since the last aggregation endpoint for a
        given data type.

        This has a problem where an endpoint at 23:45:04 will be returned as
        23:45:00. This makes the return value incorrect for raw data types
        having readings at sub-minute intervals such as data for circuit,
        irradiance and weather. This condition does not affect correct
        aggregation. Only the definition of the return value is wrong.

        :param dataType: string
        :param aggDataType: string
        :param timeColumnName: string
        :param idColName: string
        :return: list of datetimes.
        """

        if idColumnName != '':
            # Key:
            # 0: raw
            # 1: agg
            # 2: time col
            # 3: id col
            # 4: last aggregated time
            sql = 'SELECT "{0}".{2}, "{0}".{3} FROM "{0}" LEFT JOIN "{1}" ON ' \
                  '"{0}".{2} = "{1}".{2} AND "{0}".{3} = "{1}".{3} WHERE "{' \
                  '1}".{2} IS NULL AND "{0}".{2} > \'{4}\' ORDER BY {2} ASC, ' \
                  '{3} ASC'

            self.logger.log('last agg endpoint: {}'.format(
                self.lastAggregationEndpoint(aggDataType, timeColumnName)))

            # The id column value is available in the tuple returned by
            # groupby but is not being used here.

            # @todo Exclude last endpoint if it is equal to the last
            # aggregation endpoint.
            #
            # The minute position filtering may be including the last
            # endpoint incorrectly because there are readings occurring
            # within the same minute as the final endpoint, e.g. 23:45:04,
            # 23:45:08, etc.
            #
            # This is not a problem with eGuage data due reading intervals
            # being every minute and zero seconds.

            return map(lambda x: datetime(x[0], x[1], x[2], x[3], x[4], 0), [
                k for k, v in groupby(
                    map(
                        lambda y: y[0].timetuple()[0:5],
                        filter(
                            lambda x: x[0].timetuple()[MINUTE_POSITION] %
                            INTERVAL_DURATION == 0, [(
                                x[0], x[1]) for x in self.rows(
                                    sql.format(
                                        self.tables[dataType],
                                        self.tables[aggDataType],
                                        timeColumnName, idColumnName,
                                        self.lastAggregationEndpoint(
                                            aggDataType, timeColumnName)))])))
            ])
        else:
            # Key:
            # 0: raw
            # 1: agg
            # 2: time col
            # 3: last aggregated time
            sql = 'SELECT "{0}".{2} FROM "{0}" LEFT JOIN "{1}" ON "{0}".{2}=' \
                  '"{1}".{2} WHERE "{1}".{2} IS NULL AND "{0}".{2} > \'{3}\' ' \
                  'ORDER BY {2} ASC'

            self.logger.log('last agg endpoint: {}'.format(
                self.lastAggregationEndpoint(aggDataType, timeColumnName)))

            return map(lambda x: datetime(x[0], x[1], x[2], x[3], x[4], 0), [
                k for k, v in groupby(
                    map(
                        lambda y: y.timetuple()[0:5],
                        filter(
                            lambda x: x.timetuple()[MINUTE_POSITION] %
                            INTERVAL_DURATION == 0, [(x[0]) for x in self.rows(
                                sql.format(
                                    self.tables[dataType],
                                    self.tables[aggDataType], timeColumnName,
                                    self.lastAggregationEndpoint(
                                        aggDataType, timeColumnName)))])))
            ])

    def intervalCrossed(self, minute=None, subkey=None):
        """
        Determine interval crossing. Intervals are at 0, 15, 45, 60 min.
        The interval size is determined by MECO source data.

        :param minute: The integer value of the minute.
        :param subkey: The name for the subkey used for aggregation.
        :returns: True if an interval was crossed, False otherwise.
        """

        if not minute and minute != 0:
            raise Exception('Minute not defined.')

        intervalSize = 15
        first = 0
        last = 60

        if subkey is not None:
            if minute >= self.nextMinuteCrossing[subkey] and minute <= last \
                    and \
                            self.nextMinuteCrossing[subkey] != first:
                self.nextMinuteCrossing[subkey] += intervalSize
                if self.nextMinuteCrossing[subkey] >= last:
                    self.nextMinuteCrossing[subkey] = first
                self.logger.log('minute crossed at #1.', 'debug')
                return True
            elif self.nextMinuteCrossing[
                    subkey] == first and minute >= first and minute <= intervalSize:
                self.nextMinuteCrossing[subkey] = intervalSize
                self.logger.log('minute crossed at #2.', 'debug')
                return True
            return False
        else:
            if minute >= self.nextMinuteCrossingWithoutSubkeys and minute <= \
                    last and self.nextMinuteCrossingWithoutSubkeys != first:
                self.nextMinuteCrossingWithoutSubkeys += intervalSize
                if self.nextMinuteCrossingWithoutSubkeys >= last:
                    self.nextMinuteCrossingWithoutSubkeys = first
                self.logger.log('minute crossed at #3.', 'debug')
                return True
            elif self.nextMinuteCrossingWithoutSubkeys == first and minute >=\
                    first and minute <= intervalSize:
                self.nextMinuteCrossingWithoutSubkeys = intervalSize
                self.logger.log('minute crossed at #4.', 'debug')
                return True
            return False

    def rows(self, sql):
        """
        Rows from a SQL fetch.

        :param sql: Command to be executed.
        :returns: DB result set.
        """

        self.logger.log('sql: {}'.format(sql), 'debug')
        self.dbUtil.executeSQL(self.cursor, sql)
        return self.cursor.fetchall()

    def rawData(self,
                dataType='',
                orderBy=None,
                timestampCol='',
                startDate='',
                endDate=''):
        """
        Raw data to be aggregated.

        :param dataType: string
        :param orderBy: list
        :param timestampCol: string
        :param startDate: string
        :param endDate: string
        :returns: DB rows.
        """

        # @todo Validate args.

        orderBy = filter(None, orderBy)

        return self.rows("""SELECT {} FROM "{}" WHERE {} BETWEEN '{}' AND
        '{}' ORDER BY {}""".format(self.columns[dataType],
                                   self.tables[dataType], timestampCol,
                                   startDate, endDate, ','.join(orderBy)))

    def subkeys(self,
                dataType='',
                timestampCol='',
                subkeyCol='',
                startDate='',
                endDate=''):
        """
        The distinct subkeys for a given data type within a time range.

        Subkeys are fields such as egauge_id in eGauge data or sensor_id in
        irradiance data.

        :param dataType: string
        :param timestampCol: string
        :param subkeyCol: string
        :param startDate: string
        :param endDate: string
        :returns: List of subkeys
        """

        return [
            sk[0] for sk in self.rows("""SELECT DISTINCT({}) FROM "{}"
        WHERE {} BETWEEN '{}' AND '{}'
            ORDER BY {}""".format(subkeyCol, self.tables[dataType],
                                  timestampCol, startDate, endDate, subkeyCol))
        ]

    def insertAggregatedData(self, agg=None):
        """
        :param agg: MSGAggregatedData
        :return: None
        """

        if not agg.columns:
            raise Exception('agg columns not defined.')
        if not agg.data:
            raise Exception('agg data not defined.')

        self.logger.log('agg data: {}'.format(agg.data))
        self.logger.log('agg data type: {}'.format(type(agg.data)))

        def __insertData(values=''):
            """
            Perform insert of data to the database using the given values.
            :param values: String containing values to be inserted.
            :return Nothing.
            """
            sql = 'INSERT INTO "{0}" ({1}) VALUES( {2})'.format(
                self.tables[agg.aggregationType], ','.join(agg.columns),
                values)
            self.logger.log('sql: {}'.format(sql), 'debug')
            success = self.dbUtil.executeSQL(self.cursor,
                                             sql,
                                             exitOnFail=self.exitOnError)

            # Used for a special case where data is reloaded.
            if self.commitOnEveryInsert:
                self.conn.commit()
            if not success and self.exitOnError:
                raise Exception('Failure during aggregated data insert.')

        for row in agg.data:
            if type(row) == type({}):
                # self.logger.log('row=%s' % row, 'debug')
                # self.logger.log('row type: %s' % type(row))

                for key in row.keys():
                    values = ''
                    valCnt = 0
                    for val in row[key]:
                        if val == 'NULL':
                            values += val
                        elif type(val) == type(''):
                            values += "'" + val.strip() + "'"
                        elif isinstance(val, datetime):
                            values += "'" + val.isoformat() + "'"
                        elif type(val) == type(0):
                            values += str(val)
                        elif type(val) == type(0.0):
                            values += str(val)
                        else:
                            values += val
                        if valCnt < len(agg.columns) - 1:
                            values += ","
                        valCnt += 1
                    __insertData(values=values)

            elif type(row) == type([]):
                values = ''
                valCnt = 0
                for val in row:
                    if val == 'NULL':
                        values += val
                    elif type(val) == type(''):
                        values += "'" + val.strip() + "'"
                    elif isinstance(val, datetime):
                        values += "'" + val.isoformat() + "'"
                    elif type(val) == type(0):
                        values += str(val)
                    elif type(val) == type(0.0):
                        values += str(val)
                    else:
                        values += val
                    if valCnt < len(agg.columns) - 1:
                        values += ","
                    valCnt += 1
                __insertData(values=values)
            else:
                self.logger.log('row = {}'.format(row), 'error')
                raise Exception('Row type not matched.')

        # End for row.
        self.conn.commit()

    def intervalAverages(self,
                         sums,
                         cnts,
                         timestamp,
                         timestampIndex,
                         subkeyIndex=None,
                         subkey=None):
        """
        Aggregates all data for the current interval for the given subkey.

        For the case where there are no subkeys, subkeyIndex and subkey
        should be None.

        :param sums: list
        :param cnts: list
        :param timestamp: datetime
        :param timestampIndex: int
        :param subkeyIndex: int
        :param subkey: string
        :returns: Averaged data as a dict with form {subkey:data}
        """

        if subkey is not None:
            myAvgs = {}
            reportedAgg = False
            myAvgs[subkey] = []
            sumIndex = 0

            self.logger.log('key: {}'.format(subkey), 'debug')
            # Iterate over sums.
            for s in sums[subkey]:
                if sumIndex == timestampIndex:
                    myAvgs[subkey].append(timestamp)
                elif sumIndex == subkeyIndex:
                    myAvgs[subkey].append(subkey)
                else:
                    if cnts[subkey][sumIndex] != 0:
                        if not reportedAgg:
                            self.logger.log(
                                'Aggregating {} rows of data.'.format(
                                    cnts[subkey][sumIndex]), 'debug')
                            reportedAgg = True

                        myAvgs[subkey].append(s / cnts[subkey][sumIndex])
                    else:
                        myAvgs[subkey].append('NULL')
                sumIndex += 1
            return myAvgs
        else:
            myAvgs = []
            reportedAgg = False
            sumIndex = 0
            for s in sums:
                if sumIndex == timestampIndex:
                    myAvgs.append(timestamp)
                else:
                    if cnts[sumIndex] != 0:
                        if not reportedAgg:
                            self.logger.log(
                                'Aggregating {} rows of data.'.format(
                                    cnts[sumIndex]), 'debug')
                            reportedAgg = True
                        myAvgs.append(s / cnts[sumIndex])
                    else:
                        myAvgs.append('NULL')
                sumIndex += 1
            return myAvgs

    def dataParameters(self, dataType=''):
        """
        Parameters for a given data type.
        :param dataType: string
        :return: (aggType, timeColName, subkeyColName)
        """
        try:
            assert len(self.dataParams[dataType]) == 3
            return self.dataParams[dataType]
        except:
            self.logger.log('Unmatched data type {}.'.format(dataType))

    def aggregateAllData(self, dataType=''):
        """
        Convenience method for aggregating all data for a given data type.
        Data is inserted to individual aggregated data tables.
        :param dataType: String in the list of raw data types.
        :return: Nothing.
        """
        (aggType, timeColName, subkeyColName) = self.dataParameters(dataType)

        for start, end in self.monthStartsAndEnds(timeColumnName=timeColName,
                                                  dataType=dataType):
            self.logger.log('start, end: {}, {}'.format(start, end))
            aggData = self.aggregatedData(
                dataType=dataType,
                aggregationType=aggType,
                timeColumnName=timeColName,
                subkeyColumnName=subkeyColName,
                startDate=start.strftime('%Y-%m-%d %H:%M:%S'),
                endDate=end.strftime('%Y-%m-%d %H:%M:%S'))
            self.insertAggregatedData(agg=aggData)
            for row in aggData.data:
                self.logger.log('aggData row: {}'.format(row))

    def aggregateNewData(self, dataType=''):
        """
        Convenience method for aggregating new data.

        :param dataType:
        :return: dict of {dataType: count of aggregation endpoints}
        """

        # The new aggregation starting point is equal to the last aggregation
        # endpoint up to the last unaggregated endpoint.

        (aggType, timeColName, subkeyColName) = self.dataParameters(dataType)

        (end, start) = \
            self.lastUnaggregatedAndAggregatedEndpoints(dataType).items()[0][1]

        self.logger.log(
            'datatype: {}; start, end: {}, {}; end type: {}'.format(
                dataType, start, end, type(end)), 'critical')

        if type(end) == type(None):
            # No available unaggregated endpoints results in an empty list
            # for type egauge. The reason this does not work for other types is
            # because the other types of fractional minute readings and the
            # fractional minute readings are not being handled completely but
            # this method is still capable of working without problem.
            self.logger.log('Nothing to aggregate.')
            return {dataType: 0}

        if self.incrementEndpoint(start) >= end:
            self.logger.log('Nothing to aggregate.')
            return {dataType: 0}

        aggData = self.aggregatedData(
            dataType=dataType,
            aggregationType=aggType,
            timeColumnName=timeColName,
            subkeyColumnName=subkeyColName,
            startDate=self.incrementEndpoint(start).strftime(
                '%Y-%m-%d %H:%M:%S'),
            endDate=end.strftime('%Y-%m-%d %H:%M:%S'))
        self.insertAggregatedData(agg=aggData)
        for row in aggData.data:
            self.logger.log('aggData row: {}'.format(row))

        self.logger.log('{} rows aggregated for {}.'.format(
            len(aggData.data), dataType))
        return {dataType: len(aggData.data)}

    def incrementEndpoint(self, endpoint=None):
        """
        Increment an endpoint by one interval where endpoints are the final
        timestamp in an aggregation interval.
        :param endpoint: the endpoint to be incremented.
        :return: datetime object that is the given endpoint + a predefined
        amount of minutes.
        """
        plusOneInterval = relativedelta(minutes=15)
        return endpoint + plusOneInterval

    def lastUnaggregatedAndAggregatedEndpoints(self, dataType=''):
        """
        Return the endpoints for the given data type in the form

        {datatype: (last unaggregated endpoint, last aggregated endpoint)}.
        :param dataType:
        :return: dict with tuple.
        """
        self.logger.log('datatype {}'.format(dataType))
        (aggType, timeColName, subkeyColName) = self.dataParameters(dataType)
        self.logger.log('subkey colname {}'.format(subkeyColName))

        unAggregatedEndpoints = self.unaggregatedEndpoints(
            dataType=dataType,
            aggDataType=aggType,
            timeColumnName=timeColName,
            idColumnName=subkeyColName)

        self.logger.log('unagg endpoints: {}'.format(unAggregatedEndpoints))
        return {
            dataType:
            (unAggregatedEndpoints[-1]
             if unAggregatedEndpoints != [] else None,
             self.lastAggregationEndpoint(aggDataType=aggType,
                                          timeColumnName=timeColName))
        }

    def aggregatedVsNewData(self):
        """
        Convenience method.
        :return: dict of tuples containing {datatype:(last raw datetime,
        last agg datetime)}
        """
        return {
            x.keys()[0]: (x.values()[0])
            for x in map(self.lastUnaggregatedAndAggregatedEndpoints,
                         [k for k in self.dataParams])
        }

    def monthStartsAndEnds(self, timeColumnName='', dataType=''):
        """
        Return first date and last date for the given **raw** data type for each
        month in the data's entire time range.

        The end date is incremented by on aggregation period to account for
        the data obtained at time 00:00.

        :param timeColumnName: string
        :param dataType: string
        :return: List of tuples.
        """

        self.logger.log('datatype {}'.format(dataType), 'debug')
        (start,
         end) = self.rows("""SELECT MIN({}), MAX({}) FROM \"{}\"""".format(
             timeColumnName, timeColumnName, self.tables[dataType]))[0]
        self.logger.log('start {}'.format(start))
        self.logger.log('end {}'.format(end))

        # End time needs transforming in split dates to extend the end of the
        # day to 23:59:59.

        splitDates = self.timeUtil.splitDates(start, end)

        startEndDatesTransform = []
        i = 0
        while i < len(splitDates):
            startEndDatesTransform.append(
                (splitDates[i][0],
                 self.incrementEndpoint(
                     datetime(splitDates[i][1].timetuple()[0],
                              splitDates[i][1].timetuple()[1],
                              splitDates[i][1].timetuple()[2], 23, 59, 59))))
            i += 1

        return startEndDatesTransform

    def aggregatedData(self,
                       dataType='',
                       aggregationType='',
                       timeColumnName='',
                       subkeyColumnName='',
                       startDate='',
                       endDate=''):
        """
        ***********************************************************************
        Provide aggregated data.
        ***********************************************************************

        Start and end dates are used to calculate interval crossings.

        :param dataType: String
        :param aggregationType: String
        :param timeColumnName: String
        :param subkeyColumnName: String
        :param startDate: String
        :param endDate: String
        :returns: MSGAggregatedData
        """

        aggData = []
        ci = lambda col_name: self.columns[dataType].split(',').index(col_name)

        rowCnt = 0

        mySubkeys = []
        if subkeyColumnName:
            mySubkeys = self.subkeys(dataType=dataType,
                                     timestampCol=timeColumnName,
                                     subkeyCol=subkeyColumnName,
                                     startDate=startDate,
                                     endDate=endDate)

        self.logger.log('subkeys: {}'.format(mySubkeys), 'debug')

        def __initSumAndCount(subkey=None, sums=None, cnts=None):
            """
            Initialize the sum and cnt data structures.
            :param subkey: string
            :param sums: list | dict | None
            :param cnts: list | dict | None
            """

            if not sums and not cnts:
                sums = {}
                cnts = {}

            if not mySubkeys:
                sums = []
                cnts = []
                for i in range(len(self.columns[dataType].split(','))):
                    sums.append(0)
                    cnts.append(0)
            else:
                if not subkey:
                    for i in range(len(self.columns[dataType].split(','))):
                        for k in mySubkeys:
                            if k not in sums.keys():
                                sums[k] = []
                                cnts[k] = []
                            sums[k].append(0)
                            cnts[k].append(0)
                else:
                    sums[subkey] = []
                    for i in range(len(self.columns[dataType].split(','))):
                        sums[subkey].append(0)
                    cnts[subkey] = []
                    for i in range(len(self.columns[dataType].split(','))):
                        cnts[subkey].append(0)

            return (sums, cnts)

        (sum, cnt) = __initSumAndCount()

        def __initIntervalCrossings():
            """
            Perform initialization of the interval crossings used to
            determine when interval crossings occur.
            :returns None
            """

            subkeysToCheck = copy.copy(mySubkeys)
            self.logger.log('subkeys to check: {}'.format(subkeysToCheck),
                            'debug')

            if mySubkeys:
                for row in self.rawData(
                        dataType=dataType,
                        orderBy=[timeColumnName, subkeyColumnName],
                        timestampCol=timeColumnName,
                        startDate=startDate,
                        endDate=endDate):

                    # @CRITICAL: Exit after every subkey has been visited.
                    # This scans the raw data until each subkey is encountered
                    # ONCE and then exits.
                    if subkeysToCheck != []:
                        if row[ci(subkeyColumnName)] in subkeysToCheck:
                            subkeysToCheck.remove(row[ci(subkeyColumnName)])
                        minute = row[ci(
                            timeColumnName)].timetuple()[MINUTE_POSITION]

                        if minute <= 15:
                            self.nextMinuteCrossing[row[ci(
                                subkeyColumnName)]] = 15
                        elif minute <= 30:
                            self.nextMinuteCrossing[row[ci(
                                subkeyColumnName)]] = 30
                        elif minute <= 45:
                            self.nextMinuteCrossing[row[ci(
                                subkeyColumnName)]] = 45
                        elif minute == 0 or minute <= 59:
                            self.nextMinuteCrossing[row[ci(
                                subkeyColumnName)]] = 0
                        else:
                            raise Exception(
                                'Unable to determine next minute crossing')
                        self.logger.log(
                            'next min crossing for {} = {}'.format(
                                row[ci(subkeyColumnName)],
                                self.nextMinuteCrossing[row[ci(
                                    subkeyColumnName)]]), 'debug')
                    else:
                        break

            else:
                # Non-subkey case e.g. weather data.
                rowCnt = 0
                # @todo Optimize by querying only the first row.
                for row in self.rawData(dataType=dataType,
                                        orderBy=[timeColumnName],
                                        timestampCol=timeColumnName,
                                        startDate=startDate,
                                        endDate=endDate):
                    minute = row[ci(
                        timeColumnName)].timetuple()[MINUTE_POSITION]
                    if minute <= 15:
                        self.nextMinuteCrossingWithoutSubkeys = 15
                    elif minute <= 30:
                        self.nextMinuteCrossingWithoutSubkeys = 30
                    elif minute <= 45:
                        self.nextMinuteCrossingWithoutSubkeys = 45
                    elif minute == 0 or minute <= 59:
                        self.nextMinuteCrossingWithoutSubkeys = 0
                    else:
                        raise Exception(
                            'Unable to determine next minute crossing')
                    self.logger.log(
                        'next min crossing = {}'.format(
                            self.nextMinuteCrossingWithoutSubkeys), 'debug')
                    rowCnt += 1
                    if rowCnt > 0:
                        break

        __initIntervalCrossings()

        for row in self.rawData(dataType=dataType,
                                orderBy=[timeColumnName, subkeyColumnName],
                                timestampCol=timeColumnName,
                                startDate=startDate,
                                endDate=endDate):

            if mySubkeys:
                for col in self.columns[dataType].split(','):
                    if self.mathUtil.isNumber(
                            row[ci(col)]) and ci(col) != ci(subkeyColumnName):
                        sum[row[ci(subkeyColumnName)]][ci(col)] += row[ci(col)]
                        cnt[row[ci(subkeyColumnName)]][ci(col)] += 1

                minute = row[ci(timeColumnName)].timetuple()[MINUTE_POSITION]

                if self.intervalCrossed(minute=minute,
                                        subkey=row[ci(subkeyColumnName)]):
                    minuteCrossed = minute

                    # Perform aggregation on all of the previous data including
                    # the current data for the current subkey.
                    self.logger.log(
                        'key: {}'.format(row[ci(subkeyColumnName)]), 'debug')
                    aggData += [
                        self.intervalAverages(sum, cnt,
                                              row[ci(timeColumnName)],
                                              ci(timeColumnName),
                                              ci(subkeyColumnName),
                                              row[ci(subkeyColumnName)])
                    ]
                    self.logger.log('minute crossed {}'.format(minuteCrossed),
                                    'DEBUG')

                    # Init current sum and cnt for subkey that has a completed
                    # interval.
                    (sum,
                     cnt) = __initSumAndCount(subkey=row[ci(subkeyColumnName)],
                                              sums=sum,
                                              cnts=cnt)
            else:
                for col in self.columns[dataType].split(','):
                    if self.mathUtil.isNumber(row[ci(col)]):
                        sum[ci(col)] += row[ci(col)]
                        cnt[ci(col)] += 1

                minute = row[ci(timeColumnName)].timetuple()[MINUTE_POSITION]

                if self.intervalCrossed(minute=minute):
                    aggData += [
                        self.intervalAverages(sum, cnt,
                                              row[ci(timeColumnName)],
                                              ci(timeColumnName))
                    ]
                    (sum, cnt) = __initSumAndCount(subkey=None,
                                                   sums=sum,
                                                   cnts=cnt)

            rowCnt += 1

        self.logger.log('aggdata = {}'.format(aggData), 'debug')
        return MSGAggregatedData(aggregationType=aggregationType,
                                 columns=self.columns[dataType].split(','),
                                 data=aggData)
class NewDataAggregator(object):
    """
    Perform aggregation of new data for a set of predefined data types (self
    .rawTypes).
    """

    def __init__(self):
        """
        Constructor.
        """
        self.logger = SEKLogger(__name__, 'DEBUG')
        self.aggregator = MSGDataAggregator()
        self.notifier = MSGNotifier()
        self.rawTypes = [x.name for x in list(MSGAggregationTypes)]
        self.connector = MSGDBConnector()
        self.conn = self.connector.connectDB()
        self.cursor = self.conn.cursor()
        self.dbUtil = MSGDBUtil()


    def sendNewDataNotification(self, result = None, testing = False):
        """
        Sending notification reporting on new data being available since the
        last time new data was reported.

        :param result: list of dicts containing aggregation results as
        provided by MSGDataAggregator::aggregateNewData.
        :param testing: Use testing mode when True.
        """

        self.logger.log('result {}'.format(result), 'debug')

        lastReportDate = self.notifier.lastReportDate(
            MSGNotificationHistoryTypes.MSG_DATA_AGGREGATOR)

        if not lastReportDate:
            lastReportDate = "never"

        if not result:
            msgBody = '\nNew data has NOT been aggregated in {}. No result ' \
                      'was obtained. This is an error that should be ' \
                      'investigated.'.format(self.connector.dbName)
        else:
            msgBody = '\nNew data has been aggregated in {}.'.format(
                self.connector.dbName)
            msgBody += '\n\n'
            for i in range(len(result)):
                msgBody += 'The new data count for type {} is {} readings' \
                           '.\n'.format(result[i].keys()[0],
                                        result[i].values()[0])
            msgBody += '\n\n'
            msgBody += 'The last report date was %s.' % lastReportDate
            msgBody += '\n\n'
        self.notifier.sendNotificationEmail(msgBody, testing = testing)
        self.notifier.recordNotificationEvent(
            MSGNotificationHistoryTypes.MSG_DATA_AGGREGATOR)


    def aggregateNewData(self):
        """
        :return: list of dicts obtained from
        MSGDataAggregator::aggregateNewData.
        """

        result = map(self.aggregator.aggregateNewData, self.rawTypes)

        self.logger.log('result {}'.format(result))
        return result
    patterns = ['*.xlsx']
    matchCnt = 0
    for root, dirs, filenames in os.walk('.'):
        for pat in patterns:
            for filename in fnmatch.filter(filenames, pat):
                paths.append(os.path.join(root, filename))
                matchCnt += 1

    table = 'PowerMeterEvents'

    cnt = 0
    workbookCount = 0

    for path in paths:
        workbookCount += 1
        logger.log('workbook: %s' % path)
        wb = xlrd.open_workbook(path)
        wb.sheet_names()
        print wb.sheet_names()
        sh = wb.sheet_by_index(0)
        print sh

        numRows = sh.nrows - 1

        currentRow = -1

        while currentRow < numRows:
            currentRow += 1
            row = sh.row(currentRow)

            # Row is a dict of the col vals.
        return "\n%s\n" % results
    return '\nNo previous retrieval results are available.\n'


def fileExists(fullPath):
    try:
        with open(fullPath):
            return True
    except IOError, e:
        return False


processCommandLineArguments()

if COMMAND_LINE_ARGS.testing:
    logger.log("Testing mode is ON.\n", 'info')
    connector = MSGDBConnector(True)
else:
    connector = MSGDBConnector(testing=COMMAND_LINE_ARGS.testing)
if COMMAND_LINE_ARGS.email:
    logger.log("Email will be sent.\n", 'info')

conn = connector.conn

databaseName = ''

if COMMAND_LINE_ARGS.testing:
    databaseName = configer.configOptionValue("Database", "testing_db_name")
else:
    databaseName = configer.configOptionValue("Database", "db_name")
class MECOXMLParser(object):
    """
    Parses XML for MECO data.
    """

    tableName = ""

    def __init__(self, testing=False):
        """
        Constructor.

        :param testing: (optional) Boolean indicating if Testing Mode is on.
        """

        self.logger = SEKLogger(__name__, "silent")

        if testing:
            self.logger.log("Testing Mode is ON.", "info")

        self.debug = False
        self.configer = MSGConfiger()
        if self.configer.configOptionValue("Debugging", "debug") == True:
            self.debug = True

        self.util = MSGDBUtil()
        self.mapper = MECOMapper()
        self.connector = MSGDBConnector(testing)
        self.conn = self.connector.connectDB()
        self.filename = None
        self.fileObject = None
        self.processForInsertElementCount = 0
        self.inserter = MECODBInserter()
        self.insertDataIntoDatabase = False

        # Count number of times sections in source data are encountered.
        self.tableNameCount = {
            "SSNExportDocument": 0,
            "MeterData": 0,
            "RegisterData": 0,
            "RegisterRead": 0,
            "Tier": 0,
            "Register": 0,
            "IntervalReadData": 0,
            "Interval": 0,
            "Reading": 0,
            "IntervalStatus": 0,
            "ChannelStatus": 0,
            "EventData": 0,
            "Event": 0,
        }

        # Use this dictionary to track which channels were processed when
        # readings are being processed. this is to prevent duplicate channel
        # data from being inserted.
        self.channelProcessed = {}

        self.initChannelProcessed()

        # Tables to be inserted to.
        self.insertTables = self.configer.insertTables

        self.lastSeqVal = None
        self.fKeyVal = None
        self.lastTable = None
        self.fkDeterminer = MECOFKDeterminer()
        self.dupeChecker = MECODupeChecker()
        self.currentMeterName = None
        self.currentIntervalEndTime = None
        self.currentRegisterReadReadTime = None
        self.dupesExist = False
        self.channelDupeExists = False  # For Reading dupes.
        self.numberDupeExists = False  # For Register dupes.
        self.eventTimeDupeExists = False  # For Event dupes.
        self.commitCount = 0
        self.readingDupeOnInsertCount = 0  # For Reading dupes.
        self.registerDupeOnInsertCount = 0  # For Register dupes.
        self.eventDupeOnInsertCount = 0  # For Event dupes.
        self.dataProcessCount = 0
        self.readingDupeCheckCount = 0  # For Reading dupes.
        self.registerDupeCheckCount = 0  # For Register dupes.
        self.eventDupeCheckCount = 0  # For Event dupes.
        self.insertCount = 0
        self.cumulativeInsertCount = 0
        self.nonProcessForInsertElementCount = 0
        self.readingInsertCount = 0
        self.registerInsertCount = 0
        self.eventInsertCount = 0
        self.totalReadingInsertCount = 0
        self.totalRegisterInsertCount = 0
        self.totalEventInsertCount = 0
        self.totalReadingDupeOnInsertCount = 0
        self.totalRegisterDupeOnInsertCount = 0
        self.totalEventDupeOnInsertCount = 0

    def parseXML(self, fileObject, insert=False, jobID=""):
        """
        Parse an XML file.

        :param fileObject: a file object referencing an XML file.
        :param insert: (optional) True to insert to the database | False to
        perform no
        inserts.
        :returns: String containing a concise log of parsing.
        """

        print "parseXML:"

        self.commitCount = 0
        self.insertDataIntoDatabase = insert

        parseMsg = "\nParsing XML in %s.\n" % self.filename
        sys.stderr.write(parseMsg)
        parseLog = parseMsg

        tree = ET.parse(fileObject)
        root = tree.getroot()

        parseLog += self.walkTheTreeFromRoot(root, jobID=jobID)

        return parseLog

    def tableNameForAnElement(self, element):
        """
        Get the tablename for an element.

        :param element: Element tree element.
        :returns: table name
        """

        try:
            name = re.search("\{.*\}(.*)", element.tag).group(1)
        except:
            name = None
        return name

    def processDataToBeInserted(self, columnsAndValues, currentTableName, fKeyValue, parseLog, pkeyCol, jobID=""):
        """
        This is the method that performs insertion of parsed data to the
        database. Duplicate checks are performed on the endpoints of the data
         branches.

        :param columnsAndValues: A dictionary containing columns and their
        values.
        :param currentTableName: The name of the current table.
        :param fKeyValue: The value of the foreign key.
        :param parseLog: String containing a concise log of operations.
        :param pkeyCol: Column name for the primary key.
        :param jobID: Identifier for multiprocessing process.
        :returns: A string containing the parse log.
        """

        self.dataProcessCount += 1

        # Handle a special case for duplicate reading data.
        # Intercept the duplicate reading data before insert.
        if currentTableName == "Reading":
            self.channelDupeExists = self.dupeChecker.readingBranchDupeExists(
                self.conn, self.currentMeterName, self.currentIntervalEndTime, columnsAndValues["Channel"]
            )
            self.readingDupeCheckCount += 1

        if currentTableName == "Register":
            self.numberDupeExists = self.dupeChecker.registerBranchDupeExists(
                self.conn, self.currentMeterName, self.currentRegisterReadReadTime, columnsAndValues["Number"]
            )
            self.registerDupeCheckCount += 1

        if currentTableName == "Event":
            self.eventTimeDupeExists = self.dupeChecker.eventBranchDupeExists(
                self.conn, self.currentMeterName, columnsAndValues["EventTime"]
            )
            self.eventDupeCheckCount += 1

        # Only perform an insert if there are no duplicate values
        # for the channel.
        if not self.channelDupeExists and not self.numberDupeExists and not self.eventTimeDupeExists:

            # ***********************
            # ***** INSERT DATA *****
            # ***********************
            cur = self.inserter.insertData(
                self.conn, currentTableName, columnsAndValues, fKeyVal=fKeyValue, withoutCommit=1
            )
            # The last 1 indicates don't commit. Commits are handled externally.
            self.insertCount += 1
            self.cumulativeInsertCount += 1

            # Only attempt getting the last sequence value if an insertion
            # took place.
            self.lastSeqVal = self.util.getLastSequenceID(self.conn, currentTableName, pkeyCol)
            # Store the primary key.
            self.fkDeterminer.pkValforCol[pkeyCol] = self.lastSeqVal

            if currentTableName == "Reading":
                self.readingInsertCount += 1
                self.totalReadingInsertCount += 1
            elif currentTableName == "Register":
                self.registerInsertCount += 1
                self.totalRegisterInsertCount += 1
            elif currentTableName == "Event":
                self.eventInsertCount += 1
                self.totalEventInsertCount += 1

        else:  # Don't insert into Reading or Register table if a dupe exists.

            if self.channelDupeExists:
                self.readingDupeOnInsertCount += 1
                self.totalReadingDupeOnInsertCount += 1
                if self.readingDupeOnInsertCount > 0 and self.readingDupeOnInsertCount < 2:
                    parseLog += self.logger.logAndWrite("%s:{rd-dupe==>}" % jobID)

                # Also, verify the data is equivalent to the existing record.
                matchingValues = self.dupeChecker.readingValuesAreInTheDatabase(self.conn, columnsAndValues)
                assert matchingValues == True, (
                    "Duplicate check found "
                    "non-matching values for meter"
                    " %s,"
                    " endtime %s, channel %s (%s, "
                    "%s)."
                    % (
                        self.currentMeterName,
                        self.currentIntervalEndTime,
                        columnsAndValues["Channel"],
                        columnsAndValues["RawValue"],
                        columnsAndValues["Value"],
                    )
                )

                self.channelDupeExists = False

            elif self.numberDupeExists:
                self.registerDupeOnInsertCount += 1
                self.totalRegisterDupeOnInsertCount += 1
                if self.registerDupeOnInsertCount > 0 and self.registerDupeOnInsertCount < 2:
                    parseLog += self.logger.logAndWrite("%s:{re-dupe==>}" % jobID)

                self.numberDupeExists = False

            elif self.eventTimeDupeExists:
                self.eventDupeOnInsertCount += 1
                self.totalEventDupeOnInsertCount += 1
                if self.eventDupeOnInsertCount > 0 and self.eventDupeOnInsertCount < 2:
                    parseLog += self.logger.logAndWrite("%s:{ev-dupe==>}" % jobID)

                self.eventTimeDupeExists = False

            else:
                assert True == False, "Duplicate condition does not exist."

            self.logger.log("Record not inserted for %s." % columnsAndValues, "silent")

        return parseLog

    def generateConciseLogEntries(self, jobID="", reportType=None):
        """
        Create log entries in the concise log.

        :param jobID: Identifier used to distinguish multiprocessing jobs.
        :returns: A concatenated string of log entries.
        """

        # @todo Change report type to enum type.

        log = ""
        if reportType == "FINAL":
            self.logger.log("Final report", "info")

            if (
                self.readingDupeOnInsertCount > 0
                or self.registerDupeOnInsertCount > 0
                or self.eventDupeOnInsertCount > 0
            ):
                log = self.logger.logAndWrite(
                    "%s:{%srd,%sre,%sev}"
                    % (
                        jobID,
                        self.totalReadingDupeOnInsertCount,
                        self.totalRegisterDupeOnInsertCount,
                        self.totalEventDupeOnInsertCount,
                    )
                )
            else:
                log = ""
            log += self.logger.logAndWrite("(%s)" % self.commitCount)
            log += self.logger.logAndWrite("[%s]" % self.processForInsertElementCount)
            log += self.logger.logAndWrite(
                "<%srd,%sre,%sev,%s>"
                % (
                    self.totalReadingInsertCount,
                    self.totalRegisterInsertCount,
                    self.totalEventInsertCount,
                    self.cumulativeInsertCount,
                )
            )

        elif reportType == "INTERMEDIARY":

            if (
                self.readingDupeOnInsertCount > 0
                or self.registerDupeOnInsertCount > 0
                or self.eventDupeOnInsertCount > 0
            ):
                log = self.logger.logAndWrite(
                    "%s:{%srd,%sre,%sev}"
                    % (
                        jobID,
                        self.readingDupeOnInsertCount,
                        self.registerDupeOnInsertCount,
                        self.eventDupeOnInsertCount,
                    )
                )
            else:
                log = ""
            log += self.logger.logAndWrite("(%s)" % self.commitCount)
            log += self.logger.logAndWrite("[%s]" % self.processForInsertElementCount)
            log += self.logger.logAndWrite(
                "<%srd,%sre,%sev,%s,%s>"
                % (
                    self.readingInsertCount,
                    self.registerInsertCount,
                    self.eventInsertCount,
                    self.insertCount,
                    self.cumulativeInsertCount,
                )
            )
        return log

    def resetGroupCounters(self):
        """
        Reset counters that are keeping track of groups.
        """

        self.readingDupeOnInsertCount = 0
        self.insertCount = 0
        self.readingInsertCount = 0
        self.registerDupeOnInsertCount = 0
        self.registerInsertCount = 0
        self.eventInsertCount = 0
        self.eventDupeOnInsertCount = 0

    def performTableBasedOperations(self, columnsAndValues, currentTableName, element):
        """
        Perform operations that are based on the current table.

        :param columnsAndValues
        :param currentTableName
        :param element
        """

        if currentTableName == "MeterData":
            self.currentMeterName = columnsAndValues["MeterName"]

        elif currentTableName == "Interval":
            self.currentIntervalEndTime = columnsAndValues["EndTime"]

        elif currentTableName == "RegisterRead":
            self.currentRegisterReadReadTime = columnsAndValues["ReadTime"]

        elif currentTableName == "Event":
            columnsAndValues["Event_Content"] = element.text

    def walkTheTreeFromRoot(self, root, jobID=""):
        """
        Walk an XML tree from its root node.

        :param root: The root node of an XML tree.
        :param jobID: Identifier used to distinguish multiprocessing jobs.
        :returns: String containing a concise log of parsing activity.
        """

        parseLog = ""
        walker = root.iter()

        for element, nextElement in self.getNext(walker):
            # Process every element in the tree while reading ahead to get
            # the next element.

            currentTableName = self.tableNameForAnElement(element)
            nextTableName = self.tableNameForAnElement(nextElement)
            assert currentTableName is not None, "Current table does not exist."

            # Maintain a count of tables encountered.
            self.tableNameCount[currentTableName] += 1

            columnsAndValues = {}
            it = iter(sorted(element.attrib.iteritems()))

            for item in list(it):
                # Create a dictionary of column names and values.
                columnsAndValues[item[0]] = item[1]

            if currentTableName in self.insertTables:
                # Check if the current table is one of the tables to have data
                # inserted.

                self.processForInsertElementCount += 1

                if self.debug:
                    self.logger.log("Processing table %s, next is %s." % (currentTableName, nextTableName), "debug")

                # Get the column name for the primary key.
                pkeyCol = self.mapper.dbColumnsForTable(currentTableName)["_pkey"]

                fkeyCol = None
                fKeyValue = None

                try:
                    # Get the column name for the foreign key.
                    fkeyCol = self.mapper.dbColumnsForTable(currentTableName)["_fkey"]
                except:
                    pass

                if self.debug:
                    self.logger.log("foreign key col (fkey) = %s" % fkeyCol, "debug")
                    self.logger.log("primary key col (pkey) = %s" % pkeyCol, "debug")
                    self.logger.log(columnsAndValues, "debug")

                if fkeyCol is not None:
                    # Get the foreign key value.
                    fKeyValue = self.fkDeterminer.pkValforCol[fkeyCol]

                if self.debug:
                    self.logger.log("fKeyValue = %s" % fKeyValue, "debug")

                self.performTableBasedOperations(columnsAndValues, currentTableName, element)

                if self.insertDataIntoDatabase:
                    # Data is intended to be inserted into the database.
                    parseLog = self.processDataToBeInserted(
                        columnsAndValues, currentTableName, fKeyValue, parseLog, pkeyCol, jobID=jobID
                    )

                if self.debug:
                    self.logger.log("lastSeqVal = ", self.lastSeqVal)

                if self.lastReading(currentTableName, nextTableName):
                    # The last reading set has been reached.

                    if self.debug:
                        self.logger.log("----- last reading found -----", "debug")

                    parseLog += self.generateConciseLogEntries(jobID=jobID, reportType="INTERMEDIARY")
                    self.resetGroupCounters()

                    parseLog += self.logger.logAndWrite("*")
                    self.commitCount += 1
                    self.conn.commit()

                if self.lastRegister(currentTableName, nextTableName):
                    # The last register set has been reached.

                    if self.debug:
                        self.logger.log("----- last register found -----", "debug")

        # Initial commit.
        if self.commitCount == 0:
            parseLog += self.generateConciseLogEntries(jobID=jobID, reportType="INTERMEDIARY")
        self.resetGroupCounters()

        # Final commit.
        parseLog += self.logger.logAndWrite("---")
        parseLog += self.generateConciseLogEntries(jobID=jobID, reportType="FINAL")
        self.resetGroupCounters()

        parseLog += self.logger.logAndWrite("*")
        self.commitCount += 1
        self.conn.commit()
        sys.stderr.write("\n")

        self.logger.log("Data process count = %s." % self.dataProcessCount, "info")
        self.logger.log("Reading dupe check count = %s." % self.readingDupeCheckCount, "info")
        return parseLog

    def lastReading(self, currentTable, nextTable):
        """
        Determine if the last reading is being visited.

        :param currentTable: current table being processsed.
        :param nextTable: next table to be processed.
        :returns: True if last object in Reading table was read,
        otherwise return False.
        """

        if currentTable == "Reading" and (nextTable == "MeterData" or nextTable == None):
            return True
        return False

    def lastRegister(self, currentTable, nextTable):
        """
        Determine if the last register is being visited.

        :param currentTable: current table being processsed.
        :param nextTable: next table to be processed.
        :returns: True if last object in Register table was read,
        otherwise return False.
        """

        if currentTable == "Register" and (nextTable == "MeterData" or nextTable == None):
            return True
        return False

    def getNext(self, somethingIterable, window=1):
        """
        Return the current item and next item in an iterable data structure.

        :param somethingIterable: Something that has an iterator.
        :param window: How far to look ahead in the collection.
        :returns: The current iterable value and the next iterable value.
        """

        items, nexts = tee(somethingIterable, 2)
        nexts = islice(nexts, window, None)
        return izip_longest(items, nexts)

    def initChannelProcessed(self):
        """
        Initialize the dictionary for channel processing.
        """

        self.channelProcessed = {"1": False, "2": False, "3": False, "4": False}

    def getLastElement(self, rows):
        """
        Get the last element in a collection.

        Example:
            rows = (element1, element2, element3)
            getLastElement(rows) # return element3

        :param rows: Result rows from a query.
        :returns: The last element in the collection.
        """

        for i, var in enumerate(rows):
            if i == len(rows) - 1:
                return var
class MECODupeChecker(object):
    """
    Check for duplicate data in the database.
    """

    def __init__(self):
        """
        Constructor.
        """

        self.logger = SEKLogger(__name__, 'debug')
        self.mecoConfig = MSGConfiger()
        self.currentReadingID = 0
        self.dbUtil = MSGDBUtil()


    def getLastElement(self, rows):
        """
        Get the last element in a collection.

        Example:
            rows = (element1, element2, element3)
            getLastElement(rows) # return element3

        :param rows Result froms from a query
        :return last element in the collection
        """

        for i, var in enumerate(rows):
            if i == len(rows) - 1:
                return var

    def eventBranchDupeExists(self, conn, meterName, eventTime):
        """

        :param conn: Database connection.
        :param meterName: Meter name in MeterData table.
        :param eventTime: Timestamp of event.
        :return: True if tuple exists, False if not.
        """

        dbCursor = conn.cursor()

        sql = """SELECT "Event".event_time,
                        "MeterData".meter_data_id,
                        "EventData".event_data_id
                 FROM ( ( "MeterData" JOIN "EventData" ON (
                        ( "MeterData".meter_data_id = "EventData"
                        .meter_data_id ) ) )
                 JOIN "Event" ON ( ( "EventData".event_data_id = "Event"
                 .event_data_id ) ) )
                 WHERE "MeterData".meter_name = '%s'
                 AND "Event".event_time = '%s' """ % (meterName, eventTime)

        self.dbUtil.executeSQL(dbCursor, sql)
        rows = dbCursor.fetchall()

        if len(rows) > 0:
            return True
        else:
            return False


    def registerBranchDupeExists(self, conn, meterName, readTime,
                                 registerNumber, DEBUG = False):
        """
        Determine if a register branch duplicate exists for a given meter
        name, read time, number tuple.

        :param conn: Database connection.
        :param meterName: Meter name in MeterData table.
        :param readTime: Read time in RegisterRead table.
        :param registerNumber: Corresponds to DB column "number".
        :return: True if tuple exists, False if not.
        """

        dbCursor = conn.cursor()

        sql = """SELECT "public"."MeterData".meter_name,
                        "public"."RegisterRead".read_time,
                        "public"."Register"."number"
                 FROM "public"."MeterData"
                 INNER JOIN "public"."RegisterData" ON
                      "public" ."MeterData".meter_data_id = "public"
                      ."RegisterData".meter_data_id
                 INNER JOIN "public"."RegisterRead" ON
                      "public"."RegisterData" .register_data_id = "public"
                      ."RegisterRead".register_data_id
                 INNER JOIN "public"."Tier" ON "public"."RegisterRead"
                 .register_read_id = "public"."Tier" .register_read_id
                 INNER JOIN "public"."Register" ON "public"."Tier".tier_id =
                 "public"."Register".tier_id
                 WHERE "public"."MeterData".meter_name = '%s'
                 AND "public"."RegisterRead".read_time = '%s'
                 AND "public"."Register".number = '%s'
                 """ % (meterName, readTime, registerNumber)

        self.dbUtil.executeSQL(dbCursor, sql)
        rows = dbCursor.fetchall()

        if len(rows) > 0:
            return True
        else:
            return False


    def readingBranchDupeExists(self, conn, meterName, endTime, channel = None,
                                DEBUG = False):
        """
        Duplicate cases:
        1. Tuple (meterID, endTime) exists in the database.
        @DEPRECATED in favor of (2), full meterName-endTime-channel query.

        2. Tuple (meterID, endTime, channel) exists in the database.

        :param conn: Database connection.
        :param meterName: Meter name in MeterData table.
        :param endTime: End time in Interval table.
        :param channel: Required parameter that was previously optional. An
        optional channel is now deprecated.
        :return: True if tuple exists, False if not.
        """

        dbCursor = conn.cursor()

        if DEBUG:
            print "readingBranchDupeExists():"

        if channel != None:
            sql = """SELECT	"Interval".end_time,
                            "MeterData".meter_name,
                            "MeterData".meter_data_id,
                            "Reading".channel,
                            "Reading".reading_id
                     FROM "MeterData"
                     INNER JOIN "IntervalReadData" ON "MeterData"
                     .meter_data_id =
                      "IntervalReadData".meter_data_id
                     INNER JOIN "Interval" ON "IntervalReadData"
                     .interval_read_data_id = "Interval".interval_read_data_id
                     INNER JOIN "Reading" ON "Interval".interval_id = "Reading"
                     .interval_id
                     WHERE "Interval".end_time = '%s' and meter_name = '%s' and
                     channel = '%s'""" % (
                endTime, meterName, channel)

        else:  # deprecated query
            sql = """SELECT	"Interval".end_time,
                            "MeterData".meter_name,
                            "MeterData".meter_data_id
                     FROM "MeterData"
                     INNER JOIN "IntervalReadData" ON "MeterData"
                     .meter_data_id =
                      "IntervalReadData".meter_data_id
                     INNER JOIN "Interval" ON "IntervalReadData"
                     .interval_read_data_id = "Interval".interval_read_data_id
                     WHERE "Interval".end_time = '%s' and meter_name =
                     '%s'""" % (
                endTime, meterName)

        self.dbUtil.executeSQL(dbCursor, sql)
        rows = dbCursor.fetchall()

        if len(rows) > 0:
            assert len(
                rows) < 2, "Dupes should be less than 2, found %s: %s." % (
                len(rows), rows)

            self.currentReadingID = self.getLastElement(rows[0])
            self.logger.log('Reading ID = %s.' % self.currentReadingID,
                            'silent')

            self.logger.log(
                "Duplicate found for meter %s, end time %s, channel %s." % (
                    meterName, endTime, channel), 'silent')
            return True

        else:
            self.logger.log(
                "Found no rows for meter %s, end time %s, channel %s." % (
                    meterName, endTime, channel), 'silent')
            return False


    def readingValuesAreInTheDatabase(self, conn, readingDataDict):
        """
        Given a reading ID, verify that the values associated are present
        in the database.

        Values are from the columns:
            1. channel
            2. raw_value
            3. uom
            4. value

        :param dictionary containing reading values
        :return True if the existing values are the same, otherwise return False
        """

        dbCursor = conn.cursor()

        sql = """SELECT "Reading".reading_id,
                                "Reading".channel,
                                "Reading".raw_value,
                                "Reading".uom,
                                "Reading"."value"
                         FROM "Reading"
                         WHERE "Reading".reading_id = %s""" % (
            self.currentReadingID)

        self.dbUtil.executeSQL(dbCursor, sql)
        rows = dbCursor.fetchall()

        if self.currentReadingID == 0:
            return False

        # assert len(rows) == 1 or len(rows) == 0
        assert len(
            rows) == 1, "Didn't find a matching reading for reading ID %s." %\
                        self.currentReadingID
        if len(rows) == 1:
            self.logger.log("Found %s existing matches." % len(rows), 'silent')

            allEqual = True
            if int(readingDataDict['Channel']) == int(rows[0][1]):
                print "channel equal,"
            else:
                self.logger.log("channel not equal: %s,%s,%s" % (
                    int(readingDataDict['Channel']), int(rows[0][1]),
                    readingDataDict['Channel'] == rows[0][1]), 'debug')
                allEqual = False

            if int(readingDataDict['RawValue']) == int(rows[0][2]):
                print "raw value equal,"
            else:
                self.logger.log("rawvalue not equal: %s,%s,%s" % (
                    int(readingDataDict['RawValue']), int(rows[0][2]),
                    readingDataDict['RawValue'] == rows[0][2]), 'debug')
                allEqual = False

            if readingDataDict['UOM'] == rows[0][3]:
                print "uom equal,"
            else:
                self.logger.log("uom not equal: %s,%s,%s" % (
                    readingDataDict['UOM'], rows[0][3],
                    readingDataDict['UOM'] == rows[0][3]), 'debug')
                allEqual = False

            if self.approximatelyEqual(float(readingDataDict['Value']),
                                       float(rows[0][4]), 0.001):
                self.logger.log("value equal", 'silent')
            else:
                self.logger.log("value not equal: %s,%s,%s" % (
                    float(readingDataDict['Value']), float(rows[0][4]),
                    readingDataDict['Value'] == rows[0][4]), 'debug')
                allEqual = False

            if allEqual:
                return True
            else:
                return False
        else:
            return False


    def approximatelyEqual(self, a, b, tolerance):
        return abs(a - b) < tolerance
class MSGConfiger(object):
    """
    Supports system-specific configuration for MECO data processing.
    The site-level configuration file is located in ~/.meco-data-operations.cfg.

    Usage:

    configer = MSGConfiger()

    """

    def __init__(self):
        """
        Constructor.
        """

        self._config = ConfigParser.ConfigParser()
        self.logger = SEKLogger(__name__, 'INFO')
        self.fileUtil = SEKFileUtil()

        # Define tables that will have data inserted. Data will only be inserted
        # to tables that are defined here.
        self.insertTables = (
            'MeterData', 'RegisterData', 'RegisterRead', 'Tier', 'Register',
            'IntervalReadData', 'Interval', 'Reading', 'EventData', 'Event')

        # Check permissions on the config file. Refuse to run if the permissions
        # are not set appropriately.

        configFilePath = '~/.msg-data-operations.cfg'

        if self.fileUtil.isMoreThanOwnerReadableAndWritable(
                os.path.expanduser(configFilePath)):
            self.logger.log(
                "Configuration file permissions are too permissive. Operation "
                "will not continue.", 'error')
            sys.exit()

        try:
            self._config.read(['site.cfg', os.path.expanduser(configFilePath)])
        except:
            self.logger.log("Critical error: The data in {} cannot be "
                            "accessed successfully.".format(configFilePath),
                            'ERROR')
            sys.exit(-1)


    def configOptionValue(self, section, option):
        """
        Get a configuration value from the local configuration file.
        :param section: String of section in config file.
        :param option: String of option in config file.
        :returns: The value contained in the configuration file.
        """

        try:
            configValue = self._config.get(section, option)
            if configValue == "True":
                return True
            elif configValue == "False":
                return False
            else:
                return configValue
        except:
            self.logger.log(
                "Failed when getting configuration option {} in section {"
                "}.".format(option, section), 'error')
            sys.exit(-1)
class TestMECONotifier(unittest.TestCase):
    """
    Unit tests for the MECO Notifier.
    """

    def setUp(self):
        self.logger = SEKLogger(__name__)
        self.notifier = MSGNotifier()
        self.configer = MSGConfiger()

    def tearDown(self):
        pass

    def testInit(self):
        self.assertIsNotNone(self.notifier, "Notifier has been initialized.")

    def testEmailServer(self):
        """
        Test connecting to the email server.
        """

        errorOccurred = False
        user = self.configer.configOptionValue("Notifications", "email_username")
        password = self.configer.configOptionValue("Notifications", "email_password")

        server = smtplib.SMTP(self.configer.configOptionValue("Notifications", "smtp_server_and_port"))

        try:
            server.starttls()
        except smtplib.SMTPException as detail:
            self.logger.log("Exception: {}".format(detail))

        try:
            server.login(user, password)
        except smtplib.SMTPException as detail:
            self.logger.log("Exception: {}".format(detail))

        self.assertFalse(errorOccurred, "No errors occurred during SMTP setup.")

    def testSendEmailNotification(self):
        """
        Send a test notification by email.
        """

        if SEND_EMAIL:
            success = self.notifier.sendNotificationEmail(
                "This is a message from testSendEmailNotification.", testing=True
            )
            self.assertTrue(success, "Sending an email notification did not produce an" " exception.")
        else:
            self.assertTrue(True, "Email is not sent when SEND_EMAIL is False.")

    def testSendEmailAttachment(self):
        """
        Send a test notification with attachment by email.
        """

        if SEND_EMAIL:
            body = "Test message"
            testDataPath = self.configer.configOptionValue("Testing", "test_data_path")
            file = os.path.join(testDataPath, "graph.png")
            success = self.notifier.sendMailWithAttachments(body, [file], testing=True)
            success = success != True
            self.assertTrue(success, "Sending an email notification did not produce an" " exception.")
        else:
            self.assertTrue(True, "Email is not sent when SEND_EMAIL is False.")
if __name__ == '__main__':
    processCommandLineArguments()
    tableBase = "MeterData"
    pkey = 'meter_id, time_utc'
    logger = SEKLogger(__name__, 'debug')
    configer = SIConfiger()
    dbUtil = SEKDBUtil()
    conn = SEKDBConnector(
        dbName = configer.configOptionValue('Database', 'db_name'),
        dbHost = configer.configOptionValue('Database', 'db_host'),
        dbPort = configer.configOptionValue('Database', 'db_port'),
        dbUsername = configer.configOptionValue('Database', 'db_username'),
        dbPassword = configer.configOptionValue('Database',
                                                'db_password')).connectDB()
    cursor = conn.cursor()

    tableOwner = configer.configOptionValue('Database', 'table_owner')
    for meterName in SIUtil().meters(basepath = COMMAND_LINE_ARGS.basepath):
        logger.log('creating table {}'.format(tableBase + "_" + meterName))
        sql = 'CREATE TABLE "{1}_{0}" ( CHECK ( meter_id = meter_id(\'{' \
              '0}\'))) INHERITS ("{1}"); ALTER TABLE ONLY "{1}_{0}" ADD ' \
              'CONSTRAINT "{1}_{0}_pkey" PRIMARY KEY ({3}); ALTER TABLE ONLY ' \
              '"{1}_{0}" ADD CONSTRAINT meter_id_fkey FOREIGN KEY (meter_id) ' \
              'REFERENCES "Meters"(meter_id) ON UPDATE CASCADE ON DELETE ' \
              'CASCADE; ALTER TABLE "{1}_{0}" OWNER TO {2}'.format(
            meterName, tableBase, tableOwner, pkey)
        if dbUtil.executeSQL(cursor, sql, exitOnFail = False):
            conn.commit()
        else:
            conn.rollback()
Esempio n. 42
0
class MECODBInserter(object):
    """
    Provides methods that perform insertion of MECO data.
    """
    def __init__(self):
        """
        Constructor.
        """

        self.logger = SEKLogger(__name__, 'debug')
        self.mapper = MECOMapper()
        self.dupeChecker = MECODupeChecker()
        self.dbUtil = MSGDBUtil()

    def __call__(self, param):
        print "CallableClass.__call__(%s)" % param

    def insertData(self,
                   conn,
                   tableName,
                   columnsAndValues,
                   fKeyVal=None,
                   withoutCommit=0):
        """
        Given a table name and a dictionary of column names and values,
        insert them to the DB.

        :param conn: database connection
        :param tableName: name of the db table
        :param columnsAndValues: dictionary of columns and values to be
        inserted to the db
        :param (optional) fKeyVal: an explicit foreign key value
        :param (optional) withoutCommit: a flag indicated that the insert
        will not be immediately committed
        :returns: A database cursor.
        """

        cur = conn.cursor()

        # Get a dictionary of mapped (from DB to source data) column names.
        columnDict = self.mapper.getDBColNameDict(tableName)

        dbColsAndVals = {}

        if VISUALIZE_DATA:
            print "----------" + tableName + "----------"
            print columnDict
            print columnsAndValues

        for col in columnDict.keys():

            # Use default as the value for the primary key so that the
            # private key is obtained from the predefined sequence.
            if col == '_pkey':
                if VISUALIZE_DATA:
                    print columnDict[col],  # DB col name.
                    print 'DEFAULT'
                dbColsAndVals[columnDict[col]] = 'DEFAULT'

            # For the foreign key, set the value from the given parameter.
            elif col == '_fkey':
                if VISUALIZE_DATA:
                    print columnDict[col],  # DB col name.
                    print fKeyVal
                dbColsAndVals[columnDict[col]] = fKeyVal

            else:
                if VISUALIZE_DATA:
                    print columnDict[col],  # DB col name.

                # The Register and Reading tables need to handle NULL
                # values as a special case.
                if tableName == 'Register' or tableName == 'Reading':
                    try:
                        if VISUALIZE_DATA:
                            print columnsAndValues[col]  # data source value
                        dbColsAndVals[columnDict[col]] = columnsAndValues[col]
                    except:
                        if VISUALIZE_DATA:
                            print 'NULL'
                        dbColsAndVals[columnDict[col]] = 'NULL'

                # For all other cases, simply pass the value.
                else:
                    if VISUALIZE_DATA:
                        print columnsAndValues[col]  # data source value
                    dbColsAndVals[columnDict[col]] = columnsAndValues[col]

        # Add a creation timestamp to MeterData.
        if tableName == 'MeterData':
            dbColsAndVals['created'] = 'NOW()'

        cols = []
        vals = []
        for col in dbColsAndVals.keys():
            cols.append(col)

            # DEFAULT, NULL and NOW() need to appear without quotes.
            if dbColsAndVals[col] in {'DEFAULT', 'NULL', 'NOW()'}:
                vals.append(dbColsAndVals[col])
            else:
                vals.append(
                    "'%s'" %
                    dbColsAndVals[col])  # Surround value with single quotes.

        sql = """INSERT INTO "%s" (%s) VALUES (%s)""" % (
            tableName, ','.join(cols), ','.join(vals))

        self.dbUtil.executeSQL(cur, sql)

        if withoutCommit == 0:
            try:
                conn.commit()
            except:
                self.logger.log("ERROR: Commit failed.", 'debug')

        return cur
class MSGWeatherDataUtil(object):
    """
    Utility methods for working with weather data.
    """
    def __init__(self):
        """
        Constructor.

        A database connection is not maintained here to keep this class
        lightweight.
        """

        self.logger = SEKLogger(__name__, DEBUG)
        self.configer = MSGConfiger()
        self.url = self.configer.configOptionValue('Weather Data',
                                                   'weather_data_url')
        self.pattern = self.configer.configOptionValue('Weather Data',
                                                       'weather_data_pattern')
        self.fileList = []
        self.dateList = []  # List of dates corresponding weather data files.
        self.fillFileListAndDateList()
        self.dbUtil = MSGDBUtil()

    def fillFileListAndDateList(self):
        """
        Return a list of weather files obtained from the remote server used
        in processing weather data.
        """

        response = urllib2.urlopen(self.url).read()

        self.logger.log('Filling file list:', DEBUG)
        for filename in re.findall(self.pattern, response):
            # Only examine first match group in the filename match.
            self.logger.log('filename {}'.format(filename[0]), DEBUG)
            self.fileList.append(filename[0])
            self.dateList.append(self.datePart(filename[0]))

    def datePart(self, filename=None, datetime=None):
        """
        Return the date part of a NOAA weather data filename.

        :param: String of the filename.
        :param: datetime object.
        :returns: String of the date part of the given parameter.
        """

        assert filename == None or datetime == None, "One argument is allowed."
        if filename:
            newName = filename.replace("QCLCD", '')
            newName = newName.replace(".zip", '')
            return newName
        if datetime:
            return datetime.strftime('%Y-%m-%d')

    def getLastDateLoaded(self, cursor):
        """
        Return the last date of loaded weather data.

        :returns: Last date.
        """

        sql = """select wban, datetime, record_type from "%s"
                 ORDER BY datetime desc limit 1""" % WEATHER_DATA_TABLE

        self.dbUtil.executeSQL(cursor, sql)
        row = cursor.fetchone()
        # self.logger.log('Date last loaded = %s' % row[1], 'info')
        return row[1]

    def getKeepList(self, fileList, cursor):
        """
        The Keep List is the list of filenames of files containing data that are
        *within* the month of the last loaded date or are beyond the last loaded
        date.

        :param: fileList: A list of files containing weather data.
        :param: DB cursor.
        :returns: List of weather data filenames to process.
        """

        keepList = []
        i = 0
        for date in fileList:
            self.logger.log('Examining date %s.' % date)

            # The list date should be the last day of the month.
            # It is the date that is compared against the last retrieved date.

            listDate = dt.datetime.strptime(self.datePart(filename=date),
                                            "%Y%m")
            lastDay = calendar.monthrange(listDate.year, listDate.month)[1]
            listDate = dt.datetime.strptime(
                '%s-%s-%s' % (listDate.year, listDate.month, lastDay),
                "%Y-%m-%d")
            self.logger.log('List date = %s.' % listDate)
            lastDate = self.getLastDateLoaded(cursor)

            self.logger.log('last date = %s' % lastDate)

            if lastDate <= listDate:
                keepList.append((i, listDate))

            i += 1

        if keepList:
            keepList.sort()

        return [fileList[d[0]] for d in keepList]
Esempio n. 44
0
class MSGTimeUtil(object):
    """
    Utilities for working with time.
    """
    def __init__(self):
        """
        Constructor.
        """

        self.logger = SEKLogger(__name__, 'debug')

    def reportOfDays(self, datetimes=None):
        """
        Return report of days processed given a set of days.

        :param datetimes: A set of datetimes.
        :returns: Report of processing as a string.
        """

        # @todo Verify datetimes is a Set.

        # self.logger.log("datetimes = %s" % datetimes, 'debug')

        if datetimes is None:
            return "No days processed."

        myDates = set()
        for day in datetimes:
            self.logger.log('Processing day %s.' % day)
            myDates.add(day.date())

        datetimeList = list(myDates)
        datetimeList.sort()

        countOfDays = len(datetimeList)
        firstDay = datetimeList[0]
        lastDay = datetimeList[len(datetimeList) - 1]

        if countOfDays == 1:
            return "Processed 1 day with date %s." % (firstDay)
        else:
            return "Processed %s days between %s to %s, inclusive." % (
                countOfDays, firstDay, lastDay)

    def conciseNow(self):
        """
        Returns the current date and time in a concise format.
        """

        return dt.now().strftime('%Y-%m-%d_%H%M%S')

    def splitStringDates(self, startDate='', endDate=''):
        """
        Break down two dates into a list containing the start and end dates
        for each month within the range.

        :param startDate: string
        :param endDate: string
        :return: List of tuples.
        """

        # self.logger.log('start,end: %s,%s' % (startDate, endDate))

        myDatetime = lambda x: dt.strptime(x, '%Y-%m-%d')
        firstDay = lambda x: dt.strptime(x.strftime('%Y-%m-01'), '%Y-%m-%d')
        startDates = map(
            firstDay,
            list(
                rrule.rrule(rrule.MONTHLY,
                            dtstart=myDatetime(startDate),
                            until=myDatetime(endDate))))
        startDates[0] = myDatetime(startDate)
        lastDay = lambda x: dt.strptime(
            '%d-%d-%d' % (x.year, x.month, calendar.monthrange(
                x.year, x.month)[1]), '%Y-%m-%d')
        endDates = map(lastDay, startDates)
        endDates[-1] = myDatetime(endDate)
        assert len(startDates) == len(
            endDates), 'Mismatch of start and end dates.'
        return zip(startDates, endDates)

    def splitDates(self, start=None, end=None):
        """
        Break down two dates into a list containing the start and end dates
        for each month within the range.

        :param start: datetime
        :param end: datetime
        :return: List of tuples.
        """

        self.logger.log('start {}, end {}'.format(start, end), 'debug')

        # First day of the month.
        firstDay = lambda x: dt.strptime(x.strftime('%Y-%m-01'), '%Y-%m-%d')
        startDates = map(
            firstDay,
            list(rrule.rrule(rrule.MONTHLY, dtstart=firstDay(start),
                             until=end)))

        # @todo add assert for verifying sorted start dates.
        startDates[0] = start

        lastDay = lambda x: dt.strptime(
            '%d-%d-%d' % (x.year, x.month, calendar.monthrange(
                x.year, x.month)[1]), '%Y-%m-%d')
        endDates = map(lastDay, startDates)
        endDates[-1] = end

        assert len(startDates) == len(
            endDates), 'Mismatch of start and end dates.'
        return zip(startDates, endDates)

    def datetimeForString(self, datetimeString):
        """
        :param datetimeString: String
        :return: datetime of string in Y-m-d H:S format.
        """
        return dt.strptime(datetimeString, '%Y-%m-%d %H:%S')
class MSGDataAggregator(object):
    """
    Use for continuous data aggregation of diverse data types relevant to the
    Maui Smart Grid project.

    Four data types are supported:

    1. Irradiance
    2. Temperature/Humidity (weather)
    3. Circuit
    4. eGauge

    The general data form conforms to

    1. timestamp, subkey_id, val1, val2, val3, ...
    2. timestamp, val1, val2, val3, ...

    Case (2) is handled within the same space as (1) by testing for the
    existence of subkeys.

    Current aggregation consists of averaging over **15-min intervals**.

    Aggregation is performed in-memory and saved to the DB. The time range is
    delimited by start date and end date where the values are included in the
    range. The timestamps for aggregation intervals are the last timestamp in a
    respective series.

    * Aggregation subkeys are values such as eGauge IDs or circuit numbers.

    Aggregation is being implemented externally for performance and flexibility
    advantages over alternative approaches such as creating a view. It may be
    rolled into an internal function at future time if that proves to be
    beneficial.

    Usage:

        from msg_data_aggregator import MSGDataAggregator
        aggregator = MSGDataAggregator()

    API:

        aggregateAllData(dataType = dataType)

        aggregateNewData(dataType = dataType)

    """

    def __init__(self, exitOnError=True, commitOnEveryInsert=False, testing=False):
        """
        Constructor.

        :param testing: if True, the testing DB will be connected instead of
        the production DB.
        """

        self.logger = SEKLogger(__name__, "info")
        self.configer = MSGConfiger()
        self.conn = MSGDBConnector().connectDB()
        self.cursor = self.conn.cursor()
        self.dbUtil = MSGDBUtil()
        self.notifier = MSGNotifier()
        self.mathUtil = MSGMathUtil()
        self.timeUtil = MSGTimeUtil()
        self.nextMinuteCrossing = {}
        self.nextMinuteCrossingWithoutSubkeys = None
        self.exitOnError = exitOnError
        self.commitOnEveryInsert = commitOnEveryInsert
        section = "Aggregation"
        tableList = [
            "irradiance",
            "agg_irradiance",
            "weather",
            "agg_weather",
            "circuit",
            "agg_circuit",
            "egauge",
            "agg_egauge",
        ]
        self.dataParams = {
            "weather": ("agg_weather", "timestamp", ""),
            "egauge": ("agg_egauge", "datetime", "egauge_id"),
            "circuit": ("agg_circuit", "timestamp", "circuit"),
            "irradiance": ("agg_irradiance", "timestamp", "sensor_id"),
        }
        self.columns = {}

        # tables[datatype] gives the table name for datatype.
        self.tables = {t: self.configer.configOptionValue(section, "{}_table".format(t)) for t in tableList}

        for t in self.tables.keys():
            self.logger.log("t:{}".format(t), "DEBUG")
            try:
                self.columns[t] = self.dbUtil.columnsString(self.cursor, self.tables[t])
            except TypeError as error:
                self.logger.log("Ignoring missing table: Error is {}.".format(error), "error")

    def existingIntervals(self, aggDataType="", timeColumnName=""):
        """
        Retrieve the existing aggregation intervals for the given data type.

        :param aggDataType: string
        :param timeColumnName: string
        :return: List of intervals.
        """

        return [
            x[0]
            for x in self.rows(
                """SELECT {0} from \"{1}\" ORDER BY {2}""".format(
                    timeColumnName, self.tables[aggDataType], timeColumnName
                )
            )
        ]

    def unaggregatedIntervalCount(self, dataType="", aggDataType="", timeColumnName="", idColumnName=""):
        """
        Return count of unaggregated intervals for a given data type.
        :param dataType:
        :param aggDataType:
        :param timeColumnName:
        :param idColumnName:
        :return: int
        """

        return len(self.unaggregatedEndpoints(dataType, aggDataType, timeColumnName, idColumnName))

    def lastAggregationEndpoint(self, aggDataType="", timeColumnName=""):
        """
        Last aggregation endpoint for a given datatype.

        :param dataType:
        :param timeColumnName:
        :return:
        """

        return self.existingIntervals(aggDataType=aggDataType, timeColumnName=timeColumnName)[-1]

    def unaggregatedEndpoints(self, dataType="", aggDataType="", timeColumnName="", idColumnName=""):
        """
        Sorted (ascending) endpoints and their IDs, if available,
        for unaggregated intervals since the last aggregation endpoint for a
        given data type.

        This has a problem where an endpoint at 23:45:04 will be returned as
        23:45:00. This makes the return value incorrect for raw data types
        having readings at sub-minute intervals such as data for circuit,
        irradiance and weather. This condition does not affect correct
        aggregation. Only the definition of the return value is wrong.

        :param dataType: string
        :param aggDataType: string
        :param timeColumnName: string
        :param idColName: string
        :return: list of datetimes.
        """

        if idColumnName != "":
            # Key:
            # 0: raw
            # 1: agg
            # 2: time col
            # 3: id col
            # 4: last aggregated time
            sql = (
                'SELECT "{0}".{2}, "{0}".{3} FROM "{0}" LEFT JOIN "{1}" ON '
                '"{0}".{2} = "{1}".{2} AND "{0}".{3} = "{1}".{3} WHERE "{'
                '1}".{2} IS NULL AND "{0}".{2} > \'{4}\' ORDER BY {2} ASC, '
                "{3} ASC"
            )

            self.logger.log("last agg endpoint: {}".format(self.lastAggregationEndpoint(aggDataType, timeColumnName)))

            # The id column value is available in the tuple returned by
            # groupby but is not being used here.

            # @todo Exclude last endpoint if it is equal to the last
            # aggregation endpoint.
            #
            # The minute position filtering may be including the last
            # endpoint incorrectly because there are readings occurring
            # within the same minute as the final endpoint, e.g. 23:45:04,
            # 23:45:08, etc.
            #
            # This is not a problem with eGuage data due reading intervals
            # being every minute and zero seconds.

            return map(
                lambda x: datetime(x[0], x[1], x[2], x[3], x[4], 0),
                [
                    k
                    for k, v in groupby(
                        map(
                            lambda y: y[0].timetuple()[0:5],
                            filter(
                                lambda x: x[0].timetuple()[MINUTE_POSITION] % INTERVAL_DURATION == 0,
                                [
                                    (x[0], x[1])
                                    for x in self.rows(
                                        sql.format(
                                            self.tables[dataType],
                                            self.tables[aggDataType],
                                            timeColumnName,
                                            idColumnName,
                                            self.lastAggregationEndpoint(aggDataType, timeColumnName),
                                        )
                                    )
                                ],
                            ),
                        )
                    )
                ],
            )
        else:
            # Key:
            # 0: raw
            # 1: agg
            # 2: time col
            # 3: last aggregated time
            sql = (
                'SELECT "{0}".{2} FROM "{0}" LEFT JOIN "{1}" ON "{0}".{2}='
                '"{1}".{2} WHERE "{1}".{2} IS NULL AND "{0}".{2} > \'{3}\' '
                "ORDER BY {2} ASC"
            )

            self.logger.log("last agg endpoint: {}".format(self.lastAggregationEndpoint(aggDataType, timeColumnName)))

            return map(
                lambda x: datetime(x[0], x[1], x[2], x[3], x[4], 0),
                [
                    k
                    for k, v in groupby(
                        map(
                            lambda y: y.timetuple()[0:5],
                            filter(
                                lambda x: x.timetuple()[MINUTE_POSITION] % INTERVAL_DURATION == 0,
                                [
                                    (x[0])
                                    for x in self.rows(
                                        sql.format(
                                            self.tables[dataType],
                                            self.tables[aggDataType],
                                            timeColumnName,
                                            self.lastAggregationEndpoint(aggDataType, timeColumnName),
                                        )
                                    )
                                ],
                            ),
                        )
                    )
                ],
            )

    def intervalCrossed(self, minute=None, subkey=None):
        """
        Determine interval crossing. Intervals are at 0, 15, 45, 60 min.
        The interval size is determined by MECO source data.

        :param minute: The integer value of the minute.
        :param subkey: The name for the subkey used for aggregation.
        :returns: True if an interval was crossed, False otherwise.
        """

        if not minute and minute != 0:
            raise Exception("Minute not defined.")

        intervalSize = 15
        first = 0
        last = 60

        if subkey is not None:
            if (
                minute >= self.nextMinuteCrossing[subkey]
                and minute <= last
                and self.nextMinuteCrossing[subkey] != first
            ):
                self.nextMinuteCrossing[subkey] += intervalSize
                if self.nextMinuteCrossing[subkey] >= last:
                    self.nextMinuteCrossing[subkey] = first
                self.logger.log("minute crossed at #1.", "debug")
                return True
            elif self.nextMinuteCrossing[subkey] == first and minute >= first and minute <= intervalSize:
                self.nextMinuteCrossing[subkey] = intervalSize
                self.logger.log("minute crossed at #2.", "debug")
                return True
            return False
        else:
            if (
                minute >= self.nextMinuteCrossingWithoutSubkeys
                and minute <= last
                and self.nextMinuteCrossingWithoutSubkeys != first
            ):
                self.nextMinuteCrossingWithoutSubkeys += intervalSize
                if self.nextMinuteCrossingWithoutSubkeys >= last:
                    self.nextMinuteCrossingWithoutSubkeys = first
                self.logger.log("minute crossed at #3.", "debug")
                return True
            elif self.nextMinuteCrossingWithoutSubkeys == first and minute >= first and minute <= intervalSize:
                self.nextMinuteCrossingWithoutSubkeys = intervalSize
                self.logger.log("minute crossed at #4.", "debug")
                return True
            return False

    def rows(self, sql):
        """
        Rows from a SQL fetch.

        :param sql: Command to be executed.
        :returns: DB result set.
        """

        self.logger.log("sql: {}".format(sql), "debug")
        self.dbUtil.executeSQL(self.cursor, sql)
        return self.cursor.fetchall()

    def rawData(self, dataType="", orderBy=None, timestampCol="", startDate="", endDate=""):
        """
        Raw data to be aggregated.

        :param dataType: string
        :param orderBy: list
        :param timestampCol: string
        :param startDate: string
        :param endDate: string
        :returns: DB rows.
        """

        # @todo Validate args.

        orderBy = filter(None, orderBy)

        return self.rows(
            """SELECT {} FROM "{}" WHERE {} BETWEEN '{}' AND
        '{}' ORDER BY {}""".format(
                self.columns[dataType], self.tables[dataType], timestampCol, startDate, endDate, ",".join(orderBy)
            )
        )

    def subkeys(self, dataType="", timestampCol="", subkeyCol="", startDate="", endDate=""):
        """
        The distinct subkeys for a given data type within a time range.

        Subkeys are fields such as egauge_id in eGauge data or sensor_id in
        irradiance data.

        :param dataType: string
        :param timestampCol: string
        :param subkeyCol: string
        :param startDate: string
        :param endDate: string
        :returns: List of subkeys
        """

        return [
            sk[0]
            for sk in self.rows(
                """SELECT DISTINCT({}) FROM "{}"
        WHERE {} BETWEEN '{}' AND '{}'
            ORDER BY {}""".format(
                    subkeyCol, self.tables[dataType], timestampCol, startDate, endDate, subkeyCol
                )
            )
        ]

    def insertAggregatedData(self, agg=None):
        """
        :param agg: MSGAggregatedData
        :return: None
        """

        if not agg.columns:
            raise Exception("agg columns not defined.")
        if not agg.data:
            raise Exception("agg data not defined.")

        self.logger.log("agg data: {}".format(agg.data))
        self.logger.log("agg data type: {}".format(type(agg.data)))

        def __insertData(values=""):
            """
            Perform insert of data to the database using the given values.
            :param values: String containing values to be inserted.
            :return Nothing.
            """
            sql = 'INSERT INTO "{0}" ({1}) VALUES( {2})'.format(
                self.tables[agg.aggregationType], ",".join(agg.columns), values
            )
            self.logger.log("sql: {}".format(sql), "debug")
            success = self.dbUtil.executeSQL(self.cursor, sql, exitOnFail=self.exitOnError)

            # Used for a special case where data is reloaded.
            if self.commitOnEveryInsert:
                self.conn.commit()
            if not success and self.exitOnError:
                raise Exception("Failure during aggregated data insert.")

        for row in agg.data:
            if type(row) == type({}):
                # self.logger.log('row=%s' % row, 'debug')
                # self.logger.log('row type: %s' % type(row))

                for key in row.keys():
                    values = ""
                    valCnt = 0
                    for val in row[key]:
                        if val == "NULL":
                            values += val
                        elif type(val) == type(""):
                            values += "'" + val.strip() + "'"
                        elif isinstance(val, datetime):
                            values += "'" + val.isoformat() + "'"
                        elif type(val) == type(0):
                            values += str(val)
                        elif type(val) == type(0.0):
                            values += str(val)
                        else:
                            values += val
                        if valCnt < len(agg.columns) - 1:
                            values += ","
                        valCnt += 1
                    __insertData(values=values)

            elif type(row) == type([]):
                values = ""
                valCnt = 0
                for val in row:
                    if val == "NULL":
                        values += val
                    elif type(val) == type(""):
                        values += "'" + val.strip() + "'"
                    elif isinstance(val, datetime):
                        values += "'" + val.isoformat() + "'"
                    elif type(val) == type(0):
                        values += str(val)
                    elif type(val) == type(0.0):
                        values += str(val)
                    else:
                        values += val
                    if valCnt < len(agg.columns) - 1:
                        values += ","
                    valCnt += 1
                __insertData(values=values)
            else:
                self.logger.log("row = {}".format(row), "error")
                raise Exception("Row type not matched.")

        # End for row.
        self.conn.commit()

    def intervalAverages(self, sums, cnts, timestamp, timestampIndex, subkeyIndex=None, subkey=None):
        """
        Aggregates all data for the current interval for the given subkey.

        For the case where there are no subkeys, subkeyIndex and subkey
        should be None.

        :param sums: list
        :param cnts: list
        :param timestamp: datetime
        :param timestampIndex: int
        :param subkeyIndex: int
        :param subkey: string
        :returns: Averaged data as a dict with form {subkey:data}
        """

        if subkey is not None:
            myAvgs = {}
            reportedAgg = False
            myAvgs[subkey] = []
            sumIndex = 0

            self.logger.log("key: {}".format(subkey), "debug")
            # Iterate over sums.
            for s in sums[subkey]:
                if sumIndex == timestampIndex:
                    myAvgs[subkey].append(timestamp)
                elif sumIndex == subkeyIndex:
                    myAvgs[subkey].append(subkey)
                else:
                    if cnts[subkey][sumIndex] != 0:
                        if not reportedAgg:
                            self.logger.log("Aggregating {} rows of data.".format(cnts[subkey][sumIndex]), "debug")
                            reportedAgg = True

                        myAvgs[subkey].append(s / cnts[subkey][sumIndex])
                    else:
                        myAvgs[subkey].append("NULL")
                sumIndex += 1
            return myAvgs
        else:
            myAvgs = []
            reportedAgg = False
            sumIndex = 0
            for s in sums:
                if sumIndex == timestampIndex:
                    myAvgs.append(timestamp)
                else:
                    if cnts[sumIndex] != 0:
                        if not reportedAgg:
                            self.logger.log("Aggregating {} rows of data.".format(cnts[sumIndex]), "debug")
                            reportedAgg = True
                        myAvgs.append(s / cnts[sumIndex])
                    else:
                        myAvgs.append("NULL")
                sumIndex += 1
            return myAvgs

    def dataParameters(self, dataType=""):
        """
        Parameters for a given data type.
        :param dataType: string
        :return: (aggType, timeColName, subkeyColName)
        """
        try:
            assert len(self.dataParams[dataType]) == 3
            return self.dataParams[dataType]
        except:
            self.logger.log("Unmatched data type {}.".format(dataType))

    def aggregateAllData(self, dataType=""):
        """
        Convenience method for aggregating all data for a given data type.
        Data is inserted to individual aggregated data tables.
        :param dataType: String in the list of raw data types.
        :return: Nothing.
        """
        (aggType, timeColName, subkeyColName) = self.dataParameters(dataType)

        for start, end in self.monthStartsAndEnds(timeColumnName=timeColName, dataType=dataType):
            self.logger.log("start, end: {}, {}".format(start, end))
            aggData = self.aggregatedData(
                dataType=dataType,
                aggregationType=aggType,
                timeColumnName=timeColName,
                subkeyColumnName=subkeyColName,
                startDate=start.strftime("%Y-%m-%d %H:%M:%S"),
                endDate=end.strftime("%Y-%m-%d %H:%M:%S"),
            )
            self.insertAggregatedData(agg=aggData)
            for row in aggData.data:
                self.logger.log("aggData row: {}".format(row))

    def aggregateNewData(self, dataType=""):
        """
        Convenience method for aggregating new data.

        :param dataType:
        :return: dict of {dataType: count of aggregation endpoints}
        """

        # The new aggregation starting point is equal to the last aggregation
        # endpoint up to the last unaggregated endpoint.

        (aggType, timeColName, subkeyColName) = self.dataParameters(dataType)

        (end, start) = self.lastUnaggregatedAndAggregatedEndpoints(dataType).items()[0][1]

        self.logger.log(
            "datatype: {}; start, end: {}, {}; end type: {}".format(dataType, start, end, type(end)), "critical"
        )

        if type(end) == type(None):
            # No available unaggregated endpoints results in an empty list
            # for type egauge. The reason this does not work for other types is
            # because the other types of fractional minute readings and the
            # fractional minute readings are not being handled completely but
            # this method is still capable of working without problem.
            self.logger.log("Nothing to aggregate.")
            return {dataType: 0}

        if self.incrementEndpoint(start) >= end:
            self.logger.log("Nothing to aggregate.")
            return {dataType: 0}

        aggData = self.aggregatedData(
            dataType=dataType,
            aggregationType=aggType,
            timeColumnName=timeColName,
            subkeyColumnName=subkeyColName,
            startDate=self.incrementEndpoint(start).strftime("%Y-%m-%d %H:%M:%S"),
            endDate=end.strftime("%Y-%m-%d %H:%M:%S"),
        )
        self.insertAggregatedData(agg=aggData)
        for row in aggData.data:
            self.logger.log("aggData row: {}".format(row))

        self.logger.log("{} rows aggregated for {}.".format(len(aggData.data), dataType))
        return {dataType: len(aggData.data)}

    def incrementEndpoint(self, endpoint=None):
        """
        Increment an endpoint by one interval where endpoints are the final
        timestamp in an aggregation interval.
        :param endpoint: the endpoint to be incremented.
        :return: datetime object that is the given endpoint + a predefined
        amount of minutes.
        """
        plusOneInterval = relativedelta(minutes=15)
        return endpoint + plusOneInterval

    def lastUnaggregatedAndAggregatedEndpoints(self, dataType=""):
        """
        Return the endpoints for the given data type in the form

        {datatype: (last unaggregated endpoint, last aggregated endpoint)}.
        :param dataType:
        :return: dict with tuple.
        """
        self.logger.log("datatype {}".format(dataType))
        (aggType, timeColName, subkeyColName) = self.dataParameters(dataType)
        self.logger.log("subkey colname {}".format(subkeyColName))

        unAggregatedEndpoints = self.unaggregatedEndpoints(
            dataType=dataType, aggDataType=aggType, timeColumnName=timeColName, idColumnName=subkeyColName
        )

        self.logger.log("unagg endpoints: {}".format(unAggregatedEndpoints))
        return {
            dataType: (
                unAggregatedEndpoints[-1] if unAggregatedEndpoints != [] else None,
                self.lastAggregationEndpoint(aggDataType=aggType, timeColumnName=timeColName),
            )
        }

    def aggregatedVsNewData(self):
        """
        Convenience method.
        :return: dict of tuples containing {datatype:(last raw datetime,
        last agg datetime)}
        """
        return {
            x.keys()[0]: (x.values()[0])
            for x in map(self.lastUnaggregatedAndAggregatedEndpoints, [k for k in self.dataParams])
        }

    def monthStartsAndEnds(self, timeColumnName="", dataType=""):
        """
        Return first date and last date for the given **raw** data type for each
        month in the data's entire time range.

        The end date is incremented by on aggregation period to account for
        the data obtained at time 00:00.

        :param timeColumnName: string
        :param dataType: string
        :return: List of tuples.
        """

        self.logger.log("datatype {}".format(dataType), "debug")
        (start, end) = self.rows(
            """SELECT MIN({}), MAX({}) FROM \"{}\"""".format(timeColumnName, timeColumnName, self.tables[dataType])
        )[0]
        self.logger.log("start {}".format(start))
        self.logger.log("end {}".format(end))

        # End time needs transforming in split dates to extend the end of the
        # day to 23:59:59.

        splitDates = self.timeUtil.splitDates(start, end)

        startEndDatesTransform = []
        i = 0
        while i < len(splitDates):
            startEndDatesTransform.append(
                (
                    splitDates[i][0],
                    self.incrementEndpoint(
                        datetime(
                            splitDates[i][1].timetuple()[0],
                            splitDates[i][1].timetuple()[1],
                            splitDates[i][1].timetuple()[2],
                            23,
                            59,
                            59,
                        )
                    ),
                )
            )
            i += 1

        return startEndDatesTransform

    def aggregatedData(
        self, dataType="", aggregationType="", timeColumnName="", subkeyColumnName="", startDate="", endDate=""
    ):
        """
        ***********************************************************************
        Provide aggregated data.
        ***********************************************************************

        Start and end dates are used to calculate interval crossings.

        :param dataType: String
        :param aggregationType: String
        :param timeColumnName: String
        :param subkeyColumnName: String
        :param startDate: String
        :param endDate: String
        :returns: MSGAggregatedData
        """

        aggData = []
        ci = lambda col_name: self.columns[dataType].split(",").index(col_name)

        rowCnt = 0

        mySubkeys = []
        if subkeyColumnName:
            mySubkeys = self.subkeys(
                dataType=dataType,
                timestampCol=timeColumnName,
                subkeyCol=subkeyColumnName,
                startDate=startDate,
                endDate=endDate,
            )

        self.logger.log("subkeys: {}".format(mySubkeys), "debug")

        def __initSumAndCount(subkey=None, sums=None, cnts=None):
            """
            Initialize the sum and cnt data structures.
            :param subkey: string
            :param sums: list | dict | None
            :param cnts: list | dict | None
            """

            if not sums and not cnts:
                sums = {}
                cnts = {}

            if not mySubkeys:
                sums = []
                cnts = []
                for i in range(len(self.columns[dataType].split(","))):
                    sums.append(0)
                    cnts.append(0)
            else:
                if not subkey:
                    for i in range(len(self.columns[dataType].split(","))):
                        for k in mySubkeys:
                            if k not in sums.keys():
                                sums[k] = []
                                cnts[k] = []
                            sums[k].append(0)
                            cnts[k].append(0)
                else:
                    sums[subkey] = []
                    for i in range(len(self.columns[dataType].split(","))):
                        sums[subkey].append(0)
                    cnts[subkey] = []
                    for i in range(len(self.columns[dataType].split(","))):
                        cnts[subkey].append(0)

            return (sums, cnts)

        (sum, cnt) = __initSumAndCount()

        def __initIntervalCrossings():
            """
            Perform initialization of the interval crossings used to
            determine when interval crossings occur.
            :returns None
            """

            subkeysToCheck = copy.copy(mySubkeys)
            self.logger.log("subkeys to check: {}".format(subkeysToCheck), "debug")

            if mySubkeys:
                for row in self.rawData(
                    dataType=dataType,
                    orderBy=[timeColumnName, subkeyColumnName],
                    timestampCol=timeColumnName,
                    startDate=startDate,
                    endDate=endDate,
                ):

                    # @CRITICAL: Exit after every subkey has been visited.
                    # This scans the raw data until each subkey is encountered
                    # ONCE and then exits.
                    if subkeysToCheck != []:
                        if row[ci(subkeyColumnName)] in subkeysToCheck:
                            subkeysToCheck.remove(row[ci(subkeyColumnName)])
                        minute = row[ci(timeColumnName)].timetuple()[MINUTE_POSITION]

                        if minute <= 15:
                            self.nextMinuteCrossing[row[ci(subkeyColumnName)]] = 15
                        elif minute <= 30:
                            self.nextMinuteCrossing[row[ci(subkeyColumnName)]] = 30
                        elif minute <= 45:
                            self.nextMinuteCrossing[row[ci(subkeyColumnName)]] = 45
                        elif minute == 0 or minute <= 59:
                            self.nextMinuteCrossing[row[ci(subkeyColumnName)]] = 0
                        else:
                            raise Exception("Unable to determine next minute crossing")
                        self.logger.log(
                            "next min crossing for {} = {}".format(
                                row[ci(subkeyColumnName)], self.nextMinuteCrossing[row[ci(subkeyColumnName)]]
                            ),
                            "debug",
                        )
                    else:
                        break

            else:
                # Non-subkey case e.g. weather data.
                rowCnt = 0
                # @todo Optimize by querying only the first row.
                for row in self.rawData(
                    dataType=dataType,
                    orderBy=[timeColumnName],
                    timestampCol=timeColumnName,
                    startDate=startDate,
                    endDate=endDate,
                ):
                    minute = row[ci(timeColumnName)].timetuple()[MINUTE_POSITION]
                    if minute <= 15:
                        self.nextMinuteCrossingWithoutSubkeys = 15
                    elif minute <= 30:
                        self.nextMinuteCrossingWithoutSubkeys = 30
                    elif minute <= 45:
                        self.nextMinuteCrossingWithoutSubkeys = 45
                    elif minute == 0 or minute <= 59:
                        self.nextMinuteCrossingWithoutSubkeys = 0
                    else:
                        raise Exception("Unable to determine next minute crossing")
                    self.logger.log("next min crossing = {}".format(self.nextMinuteCrossingWithoutSubkeys), "debug")
                    rowCnt += 1
                    if rowCnt > 0:
                        break

        __initIntervalCrossings()

        for row in self.rawData(
            dataType=dataType,
            orderBy=[timeColumnName, subkeyColumnName],
            timestampCol=timeColumnName,
            startDate=startDate,
            endDate=endDate,
        ):

            if mySubkeys:
                for col in self.columns[dataType].split(","):
                    if self.mathUtil.isNumber(row[ci(col)]) and ci(col) != ci(subkeyColumnName):
                        sum[row[ci(subkeyColumnName)]][ci(col)] += row[ci(col)]
                        cnt[row[ci(subkeyColumnName)]][ci(col)] += 1

                minute = row[ci(timeColumnName)].timetuple()[MINUTE_POSITION]

                if self.intervalCrossed(minute=minute, subkey=row[ci(subkeyColumnName)]):
                    minuteCrossed = minute

                    # Perform aggregation on all of the previous data including
                    # the current data for the current subkey.
                    self.logger.log("key: {}".format(row[ci(subkeyColumnName)]), "debug")
                    aggData += [
                        self.intervalAverages(
                            sum,
                            cnt,
                            row[ci(timeColumnName)],
                            ci(timeColumnName),
                            ci(subkeyColumnName),
                            row[ci(subkeyColumnName)],
                        )
                    ]
                    self.logger.log("minute crossed {}".format(minuteCrossed), "DEBUG")

                    # Init current sum and cnt for subkey that has a completed
                    # interval.
                    (sum, cnt) = __initSumAndCount(subkey=row[ci(subkeyColumnName)], sums=sum, cnts=cnt)
            else:
                for col in self.columns[dataType].split(","):
                    if self.mathUtil.isNumber(row[ci(col)]):
                        sum[ci(col)] += row[ci(col)]
                        cnt[ci(col)] += 1

                minute = row[ci(timeColumnName)].timetuple()[MINUTE_POSITION]

                if self.intervalCrossed(minute=minute):
                    aggData += [self.intervalAverages(sum, cnt, row[ci(timeColumnName)], ci(timeColumnName))]
                    (sum, cnt) = __initSumAndCount(subkey=None, sums=sum, cnts=cnt)

            rowCnt += 1

        self.logger.log("aggdata = {}".format(aggData), "debug")
        return MSGAggregatedData(
            aggregationType=aggregationType, columns=self.columns[dataType].split(","), data=aggData
        )
    # Get the keep list.
    keepList = weatherUtil.getKeepList(retriever.fileList, cursor)
    if keepList:
        msg = "Performing secondary retrieval."
        print msg
        MSG_BODY += "%s\n" % msg

        retriever.pool = multiprocessing.Pool(int(multiprocessingLimit))
        results = retriever.pool.map(performDownloadingWithForcedDownload, keepList)
        retriever.pool.close()
        retriever.pool.join()

        if False in results:
            msg = "An error occurred during secondary retrieval."
            print msg
            MSG_BODY += "%s\n" % msg

    if downloadCount == 0:
        # Retrieve the last dated set if nothing else was retrieved.
        retriever.dateList.sort()
        logger.log("filelist {}".format(retriever.fileList), INFO)
        performDownloading(retriever.fileList[-1], forceDownload=True)

    msg = "downloadCount = %s." % downloadCount
    print msg
    MSG_BODY += "%s\n" % msg

    cleanUpTxtFiles()
    saveRetrievalResults()
Esempio n. 47
0
    result = insertDataWrapper(path)
    pattern = 'Process-(\d+),'
    jobString = str(multiprocessing.current_process())
    match = re.search(pattern, jobString)
    assert match.group(1) is not None, "Process ID was matched."
    returnDict[match.group(1)] = result


if __name__ == '__main__':

    processCommandLineArguments()

    inserter = Inserter()

    if COMMAND_LINE_ARGS.testing:
        logger.log("Testing mode is ON.\n", 'info')
    if COMMAND_LINE_ARGS.email:
        logger.log("Email will be sent.\n", 'info')

    msg = ''  # Used for the notification message.
    msgBody = ''  # Used for the notification message.
    databaseName = ''

    if COMMAND_LINE_ARGS.testing:
        databaseName = configer.configOptionValue("Database",
                                                  "testing_db_name")
    else:
        databaseName = configer.configOptionValue("Database", "db_name")

    msg = "Recursively inserting data to the database named %s." % databaseName
    print msg
Esempio n. 48
0
class SEKDBConnector(object):
    """
    Make and manage a connection to a PostgreSQL database.

    Usage:

        conn = SEKDBConnector().connectDB()
        cursor = conn.cursor()

    """
    def __init__(self,
                 dbName='',
                 dbHost='',
                 dbPort='',
                 dbUsername='',
                 dbPassword='',
                 testing=False,
                 logLevel='silent'):
        """
        Constructor.

        :param testing: Boolean indicating if Testing Mode is on. When
        testing mode is on, a connection to the testing database will be made
        instead of the production database. This is useful for unit testing.
        :param logLevel
        """

        self.logger = SEKLogger(__name__, logLevel)

        if testing:
            self.logger.log("Testing Mode is ON.")

        self.dbName = dbName
        self.dbHost = dbHost
        self.dbPort = dbPort
        self.dbPassword = dbPassword
        self.dbUsername = dbUsername

        self.logger.log(
            "Instantiating DB connector with database {}.".format(dbName))

        self.conn = self.connectDB()
        if not self.conn:
            self.logger.log('DB connection not available.', 'error')
            sys.exit(-1)

        try:
            self.dictCur = self.conn.cursor(
                cursor_factory=psycopg2.extras.DictCursor)
        except AttributeError as error:
            self.logger.log('Error while getting DictCursor: {}'.format(error))

    def connectDB(self):
        """
        Make the DB connection.
        :returns: DB connection object if successful, otherwise None.
        """

        # @todo Make this method private since the init makes the connection.

        conn = None

        try:
            conn = psycopg2.connect(
                "dbname='{0}' user='******' host='{2}' port='{3}' password='******'".format(self.dbName, self.dbUsername, self.dbHost,
                             self.dbPort, self.dbPassword))
        except Exception as detail:
            self.logger.log(
                "Failed to connect to the database {}: {}.".format(
                    self.dbName, detail), 'error')
            sys.exit(-1)

        self.logger.log("Opened DB connection to database {}.".format(
            self.dbName))
        return conn

    def closeDB(self, conn):
        """
        Close a database connection.
        """

        self.logger.log("Closing database {}.".format(self.dbName))
        conn.close()

    def __del__(self):
        """
        Destructor.

        Close the database connection.
        """

        self.logger.log("Closing the DB connection to database {}.".format(
            self.dbName))
        self.conn.close()
class Inserter(object):
    """
    Perform insertion of data contained in a single file to the MECO database
    specified in the configuration file.
    """
    def __init__(self, testing=False):
        """
        Constructor.

        :param testing: Flag indicating if testing mode is on.
        """

        self.logger = SEKLogger(__name__)
        self.parser = MECOXMLParser(testing)
        self.configer = MSGConfiger()

    def insertData(self, filePath, testing=False, jobID=''):
        """
        Insert data from a single file to the database.

        :param filePath: Full path of a data file.
        :param testing: Boolean flag indicating if the testing database
        should be used.
        :param jobID: An ID used to distinguish multiprocessing jobs.
        :returns: String containing concise log of activity.
        """

        parseMsg = ''
        parseLog = ''

        print "Processing file %s." % filePath
        i = Inserter(testing)
        if i.configer.configOptionValue("Debugging", "debug"):
            print "Debugging is on"

        if testing:
            parseMsg = "\nInserting data to database %s.\n" % i.configer\
                .configOptionValue(
                "Database", "testing_db_name")
            sys.stderr.write(parseMsg)
            parseLog += parseMsg
        else:
            parseMsg += "\nInserting data to database %s.\n" % i.configer\
                .configOptionValue(
                "Database", "db_name")
            sys.stderr.write(parseMsg)
            parseLog += parseMsg

        fileObject = None

        # Open the file and process it.
        if re.search('.*\.xml$', filePath):
            fileObject = open(filePath, "rb")
        elif re.search('.*\.xml\.gz$', filePath):
            fileObject = gzip.open(filePath, "rb")
        else:
            print "Error: %s is not an XML file." % filePath

        try:
            with FileLock(filePath, timeout=2) as lock:
                self.logger.log("Locking %s " % filePath)
                i.parser.filename = filePath

                # Obtain the log of the parsing.
                parseLog += i.parser.parseXML(fileObject, True, jobID=jobID)

                fileObject.close()
        except TypeError:
            self.logger.log('Type error occurred', 'error')

        return parseLog
Esempio n. 50
0
class MECODupeChecker(object):
    """
    Check for duplicate data in the database.
    """
    def __init__(self):
        """
        Constructor.
        """

        self.logger = SEKLogger(__name__, 'debug')
        self.mecoConfig = MSGConfiger()
        self.currentReadingID = 0
        self.dbUtil = MSGDBUtil()

    def getLastElement(self, rows):
        """
        Get the last element in a collection.

        Example:
            rows = (element1, element2, element3)
            getLastElement(rows) # return element3

        :param rows Result froms from a query
        :return last element in the collection
        """

        for i, var in enumerate(rows):
            if i == len(rows) - 1:
                return var

    def eventBranchDupeExists(self, conn, meterName, eventTime):
        """

        :param conn: Database connection.
        :param meterName: Meter name in MeterData table.
        :param eventTime: Timestamp of event.
        :return: True if tuple exists, False if not.
        """

        dbCursor = conn.cursor()

        sql = """SELECT "Event".event_time,
                        "MeterData".meter_data_id,
                        "EventData".event_data_id
                 FROM ( ( "MeterData" JOIN "EventData" ON (
                        ( "MeterData".meter_data_id = "EventData"
                        .meter_data_id ) ) )
                 JOIN "Event" ON ( ( "EventData".event_data_id = "Event"
                 .event_data_id ) ) )
                 WHERE "MeterData".meter_name = '%s'
                 AND "Event".event_time = '%s' """ % (meterName, eventTime)

        self.dbUtil.executeSQL(dbCursor, sql)
        rows = dbCursor.fetchall()

        if len(rows) > 0:
            return True
        else:
            return False

    def registerBranchDupeExists(self,
                                 conn,
                                 meterName,
                                 readTime,
                                 registerNumber,
                                 DEBUG=False):
        """
        Determine if a register branch duplicate exists for a given meter
        name, read time, number tuple.

        :param conn: Database connection.
        :param meterName: Meter name in MeterData table.
        :param readTime: Read time in RegisterRead table.
        :param registerNumber: Corresponds to DB column "number".
        :return: True if tuple exists, False if not.
        """

        dbCursor = conn.cursor()

        sql = """SELECT "public"."MeterData".meter_name,
                        "public"."RegisterRead".read_time,
                        "public"."Register"."number"
                 FROM "public"."MeterData"
                 INNER JOIN "public"."RegisterData" ON
                      "public" ."MeterData".meter_data_id = "public"
                      ."RegisterData".meter_data_id
                 INNER JOIN "public"."RegisterRead" ON
                      "public"."RegisterData" .register_data_id = "public"
                      ."RegisterRead".register_data_id
                 INNER JOIN "public"."Tier" ON "public"."RegisterRead"
                 .register_read_id = "public"."Tier" .register_read_id
                 INNER JOIN "public"."Register" ON "public"."Tier".tier_id =
                 "public"."Register".tier_id
                 WHERE "public"."MeterData".meter_name = '%s'
                 AND "public"."RegisterRead".read_time = '%s'
                 AND "public"."Register".number = '%s'
                 """ % (meterName, readTime, registerNumber)

        self.dbUtil.executeSQL(dbCursor, sql)
        rows = dbCursor.fetchall()

        if len(rows) > 0:
            return True
        else:
            return False

    def readingBranchDupeExists(self,
                                conn,
                                meterName,
                                endTime,
                                channel=None,
                                DEBUG=False):
        """
        Duplicate cases:
        1. Tuple (meterID, endTime) exists in the database.
        @DEPRECATED in favor of (2), full meterName-endTime-channel query.

        2. Tuple (meterID, endTime, channel) exists in the database.

        :param conn: Database connection.
        :param meterName: Meter name in MeterData table.
        :param endTime: End time in Interval table.
        :param channel: Required parameter that was previously optional. An
        optional channel is now deprecated.
        :return: True if tuple exists, False if not.
        """

        dbCursor = conn.cursor()

        if DEBUG:
            print "readingBranchDupeExists():"

        if channel != None:
            sql = """SELECT	"Interval".end_time,
                            "MeterData".meter_name,
                            "MeterData".meter_data_id,
                            "Reading".channel,
                            "Reading".reading_id
                     FROM "MeterData"
                     INNER JOIN "IntervalReadData" ON "MeterData"
                     .meter_data_id =
                      "IntervalReadData".meter_data_id
                     INNER JOIN "Interval" ON "IntervalReadData"
                     .interval_read_data_id = "Interval".interval_read_data_id
                     INNER JOIN "Reading" ON "Interval".interval_id = "Reading"
                     .interval_id
                     WHERE "Interval".end_time = '%s' and meter_name = '%s' and
                     channel = '%s'""" % (endTime, meterName, channel)

        else:  # deprecated query
            sql = """SELECT	"Interval".end_time,
                            "MeterData".meter_name,
                            "MeterData".meter_data_id
                     FROM "MeterData"
                     INNER JOIN "IntervalReadData" ON "MeterData"
                     .meter_data_id =
                      "IntervalReadData".meter_data_id
                     INNER JOIN "Interval" ON "IntervalReadData"
                     .interval_read_data_id = "Interval".interval_read_data_id
                     WHERE "Interval".end_time = '%s' and meter_name =
                     '%s'""" % (endTime, meterName)

        self.dbUtil.executeSQL(dbCursor, sql)
        rows = dbCursor.fetchall()

        if len(rows) > 0:
            assert len(
                rows) < 2, "Dupes should be less than 2, found %s: %s." % (
                    len(rows), rows)

            self.currentReadingID = self.getLastElement(rows[0])
            self.logger.log('Reading ID = %s.' % self.currentReadingID,
                            'silent')

            self.logger.log(
                "Duplicate found for meter %s, end time %s, channel %s." %
                (meterName, endTime, channel), 'silent')
            return True

        else:
            self.logger.log(
                "Found no rows for meter %s, end time %s, channel %s." %
                (meterName, endTime, channel), 'silent')
            return False

    def readingValuesAreInTheDatabase(self, conn, readingDataDict):
        """
        Given a reading ID, verify that the values associated are present
        in the database.

        Values are from the columns:
            1. channel
            2. raw_value
            3. uom
            4. value

        :param dictionary containing reading values
        :return True if the existing values are the same, otherwise return False
        """

        dbCursor = conn.cursor()

        sql = """SELECT "Reading".reading_id,
                                "Reading".channel,
                                "Reading".raw_value,
                                "Reading".uom,
                                "Reading"."value"
                         FROM "Reading"
                         WHERE "Reading".reading_id = %s""" % (
            self.currentReadingID)

        self.dbUtil.executeSQL(dbCursor, sql)
        rows = dbCursor.fetchall()

        if self.currentReadingID == 0:
            return False

        # assert len(rows) == 1 or len(rows) == 0
        assert len(
            rows) == 1, "Didn't find a matching reading for reading ID %s." %\
                        self.currentReadingID
        if len(rows) == 1:
            self.logger.log("Found %s existing matches." % len(rows), 'silent')

            allEqual = True
            if int(readingDataDict['Channel']) == int(rows[0][1]):
                print "channel equal,"
            else:
                self.logger.log(
                    "channel not equal: %s,%s,%s" %
                    (int(readingDataDict['Channel']), int(
                        rows[0][1]), readingDataDict['Channel'] == rows[0][1]),
                    'debug')
                allEqual = False

            if int(readingDataDict['RawValue']) == int(rows[0][2]):
                print "raw value equal,"
            else:
                self.logger.log(
                    "rawvalue not equal: %s,%s,%s" %
                    (int(readingDataDict['RawValue']), int(rows[0][2]),
                     readingDataDict['RawValue'] == rows[0][2]), 'debug')
                allEqual = False

            if readingDataDict['UOM'] == rows[0][3]:
                print "uom equal,"
            else:
                self.logger.log(
                    "uom not equal: %s,%s,%s" %
                    (readingDataDict['UOM'], rows[0][3], readingDataDict['UOM']
                     == rows[0][3]), 'debug')
                allEqual = False

            if self.approximatelyEqual(float(readingDataDict['Value']),
                                       float(rows[0][4]), 0.001):
                self.logger.log("value equal", 'silent')
            else:
                self.logger.log(
                    "value not equal: %s,%s,%s" %
                    (float(readingDataDict['Value']), float(
                        rows[0][4]), readingDataDict['Value'] == rows[0][4]),
                    'debug')
                allEqual = False

            if allEqual:
                return True
            else:
                return False
        else:
            return False

    def approximatelyEqual(self, a, b, tolerance):
        return abs(a - b) < tolerance
Esempio n. 51
0
class MSGEgaugeNewDataChecker(object):
    """
    Provide notification of newly loaded MSG eGauge data.

    This uses notification type MSG_EGAUGE_SERVICE.
    """
    def __init__(self):
        """
        Constructor.
        """

        print __name__
        self.logger = SEKLogger(__name__)
        self.connector = MSGDBConnector()
        self.dbUtil = MSGDBUtil()
        self.notifier = MSGNotifier()
        self.configer = MSGConfiger()

    def newDataCount(self):
        """
        Measure the amount of new data that is present since the last time
        new data was reported.
        """

        cursor = self.connector.conn.cursor()
        tableName = 'EgaugeEnergyAutoload'
        lastTime = self.lastReportDate('MSG_EGAUGE_SERVICE')
        if lastTime is None:
            lastTime = '1900-01-01'
        sql = """SELECT COUNT(*) FROM "%s" WHERE datetime > '%s'""" % (
            tableName, lastTime)

        success = self.dbUtil.executeSQL(cursor, sql)
        if success:
            rows = cursor.fetchall()

            if not rows[0][0]:
                return 0
            else:
                return rows[0][0]
        else:
            # @todo Raise an exception.
            return None

    def lastReportDate(self, notificationType):
        """
        Get the last time a notification was reported.

        :param notificationType: A string indicating the type of the
        notification. It is stored in the event history.
        :returns: datetime of last report date.
        """

        cursor = self.connector.conn.cursor()
        sql = """SELECT MAX("notificationTime") FROM "%s" WHERE
        "notificationType" = '%s'""" % (NOTIFICATION_HISTORY_TABLE,
                                        notificationType)

        success = self.dbUtil.executeSQL(cursor, sql)
        if success:
            rows = cursor.fetchall()

            if not rows[0][0]:
                return None
            else:
                return rows[0][0]
        else:
            # @todo Raise an exception.
            return None

    def saveNotificationTime(self):
        """
        Save the notification event to the notification history.
        """

        cursor = self.connector.conn.cursor()
        sql = """INSERT INTO "%s" ("notificationType", "notificationTime")
        VALUES ('MSG_EGAUGE_SERVICE', NOW())""" % NOTIFICATION_HISTORY_TABLE
        success = self.dbUtil.executeSQL(cursor, sql)
        self.connector.conn.commit()
        if not success:
            # @todo Raise an exception.
            self.logger.log(
                'An error occurred while saving the notification time.')

    def sendNewDataNotification(self, testing=False):
        """
        Sending notification reporting on new data being available since the
        last time new data was reported.

        :param testing: Use testing mode when True.
        """

        lastReportDate = self.lastReportDate('MSG_EGAUGE_SERVICE')

        if not lastReportDate:
            lastReportDate = "never"

        msgBody = '\nNew MSG eGauge data has been loaded to %s.' % self\
            .connector.dbName
        msgBody += '\n\n'
        msgBody += 'The new data count is %s readings.' % self.newDataCount()
        msgBody += '\n\n'
        msgBody += 'The last report date was %s.' % lastReportDate
        msgBody += '\n\n'
        self.notifier.sendNotificationEmail(msgBody, testing=testing)
        self.saveNotificationTime()
class SEKDBConnector(object):
    """
    Make and manage a connection to a PostgreSQL database.

    Usage:

        conn = SEKDBConnector().connectDB()
        cursor = conn.cursor()

    """


    def __init__(self, dbName = '', dbHost = '', dbPort = '', dbUsername = '',
                 dbPassword = '', testing = False, logLevel = 'silent'):
        """
        Constructor.

        :param testing: Boolean indicating if Testing Mode is on. When
        testing mode is on, a connection to the testing database will be made
        instead of the production database. This is useful for unit testing.
        :param logLevel
        """

        self.logger = SEKLogger(__name__, logLevel)

        if testing:
            self.logger.log("Testing Mode is ON.")

        self.dbName = dbName
        self.dbHost = dbHost
        self.dbPort = dbPort
        self.dbPassword = dbPassword
        self.dbUsername = dbUsername

        self.logger.log(
            "Instantiating DB connector with database {}.".format(dbName))

        self.conn = self.connectDB()
        if not self.conn:
            self.logger.log('DB connection not available.', 'error')
            sys.exit(-1)

        try:
            self.dictCur = self.conn.cursor(
                cursor_factory = psycopg2.extras.DictCursor)
        except AttributeError as error:
            self.logger.log('Error while getting DictCursor: {}'.format(error))


    def connectDB(self):
        """
        Make the DB connection.
        :returns: DB connection object if successful, otherwise None.
        """

        # @todo Make this method private since the init makes the connection.

        conn = None

        try:
            conn = psycopg2.connect(
                "dbname='{0}' user='******' host='{2}' port='{3}' password='******'".format(self.dbName, self.dbUsername, self.dbHost,
                             self.dbPort, self.dbPassword))
        except Exception as detail:
            self.logger.log(
                "Failed to connect to the database {}: {}.".format(self.dbName,
                                                                   detail),
                'error')
            sys.exit(-1)

        self.logger.log(
            "Opened DB connection to database {}.".format(self.dbName))
        return conn


    def closeDB(self, conn):
        """
        Close a database connection.
        """

        self.logger.log("Closing database {}.".format(self.dbName))
        conn.close()


    def __del__(self):
        """
        Destructor.

        Close the database connection.
        """

        self.logger.log(
            "Closing the DB connection to database {}.".format(self.dbName))
        self.conn.close()
class MSGNOAAWeatherDataInserter(object):
    """
    Performs weather data insertion to a database.
    """

    def __init__(self, testing = False):
        """
        Constructor.
        :param testing: True if testing mode is being used.
        """

        self.logger = SEKLogger(__name__, 'info')
        self.dbUtil = MSGDBUtil()
        self.dupeChecker = MSGWeatherDataDupeChecker()

    def insertDataDict(self, conn, tableName, listOfDataDicts, commit = False):
        """
        Given a table name and a dictionary of column names and values,
        insert them to the db.

        :param conn: A database connection.
        :param tableName: Name of the DB table to be inserted to.
        :param columnsAndValues: Dictionary of columns and values to be
        inserted to the DB.
        :param (optional) commit: A flag indicated that DB transactions will
        be committed.
        :returns: Set of datetimes processed.
        """

        cur = conn.cursor()
        processedDateTimes = set()

        for row in listOfDataDicts:

            # Add a creation timestamp using the SQL function.
            row['created'] = 'NOW()'

            cols = []
            vals = []

            for col in row.keys():
                # Prepare the columns and values for insertion via SQL.

                cols.append(col)
                if (row[col] != 'NULL'):
                    # Surround each value with single quotes...
                    vals.append("'%s'" % row[col])
                else:
                    # Except for NULL values.
                    vals.append("%s" % row[col])

            sql = """INSERT INTO "%s" (%s) VALUES (%s)""" % (
                tableName, ','.join(cols), ','.join(vals))

            if self.dupeChecker.duplicateExists(cur, row['wban'],
                                                row['datetime'],
                                                row['record_type']):
                self.logger.log("Dupe found, dropping dupe.", 'info')
            else:
                processedDateTimes.add(
                    dt.datetime.strptime(row['datetime'], "%Y-%m-%d %H:%M"))
                if self.dbUtil.executeSQL(cur, sql,
                                          exitOnFail = False) is False:
                    # An error occurred.
                    for col in sorted(row.keys()):
                        print "%s: %s" % (col, row[col])
                    sys.exit(-1)

        if commit:
            try:
                conn.commit()
            except:
                self.logger.log("ERROR: Commit failed.", 'debug')

        return processedDateTimes