Beispiel #1
0
    def Start(self, logger, moduleName, filelocs):
        '''
        main routine
        '''
        try:
            ApplicationBase.Start(self, logger, moduleName, filelocs)
            self.logger.info(self.moduleName + " - Processing: ")
            outputCSVfileName = self.localTempDirectory + '/PheonixDocuments.csv'

            self.logger.info(self.moduleName + " - Pull documents from Phoenix: ")
            jsonDocuments = self.PullDataFromPhoenix()
            self.logger.info(self.moduleName + " - save contents to CSV file from Phoenix: ")
            self.ExportToCSV(outputCSVfileName, jsonDocuments)
            self.logger.info(self.moduleName + " - push documents csv file to S3: ")
            bucketName, s3TempKey = S3Utilities.UploadFileToS3Temp(self.awsParams.s3, outputCSVfileName)

            self.logger.info(self.moduleName + " - Create document table: ")
            psConnect = self.GetPSConnection()
            self.CreatePostgresTables(psConnect)

            self.logger.info(self.moduleName + " - pull document s3 to database server temp: ")
            postgresTempFile = self.DownloadFromS3ToPSTempDir(psConnect, bucketName, s3TempKey)
            self.logger.info(self.moduleName + " - load documents csv file: ")
            self.LoadDataFromPostgresTempDir(psConnect, postgresTempFile)
            self.logger.info(self.moduleName + " - clean up temp file: ")
            S3Utilities.DeleteFile(self.awsParams.s3, bucketName, s3TempKey)
        except:
            logger.exception(moduleName + " - Exception in start!")
            raise
Beispiel #2
0
    def LoadFileIntoRedshift(rsConnect, s3, logger, fileUtilities, localFilepath, destinationSchema,\
                             redshiftDestTable, fileFormat, dateFormat, delimiter, isManifest='N'):
        '''
        Load file from local drive to RedShift
        Zip the file, upload to S3 and then load into RedShift
        '''
        if isManifest == 'Y':
            zipLocalFilepath = localFilepath
        else:
            # Zip the file
            zipLocalFilepath = localFilepath + ".gz"
            fileUtilities.GzipFile(localFilepath, zipLocalFilepath)

        bucketName, s3TempKey = S3Utilities.UploadFileToS3Temp(s3, zipLocalFilepath)

        # Build the job definition file
        job = {}
        job["destinationSchema"] = destinationSchema
        job["tableName"] = redshiftDestTable
        job["s3Filename"] = S3Utilities.GetS3FileName(bucketName, s3TempKey)
        job["fileFormat"] = fileFormat
        job["dateFormat"] = dateFormat
        job["delimiter"] = delimiter

        RedshiftUtilities.LoadDataFromS3(rsConnect, s3, job, logger, isManifest)

        S3Utilities.DeleteFile(s3, bucketName, s3TempKey)
Beispiel #3
0
    def Start(self, logger, moduleName, filelocs):
        '''
        main routine
        '''
        try:
            ApplicationBase.Start(self, logger, moduleName, filelocs)

            s3Key = self.job["s3SrcDirectory"] + "/" + self.job["fileToLoad"]
            self.logger.info(self.moduleName + " - Processing file: " + s3Key)

            localFilepath = self.localTempDirectory + "/" + ntpath.basename(
                s3Key)
            S3Utilities.DownloadFileFromS3(self.awsParams.s3,
                                           self.job["bucketName"], s3Key,
                                           localFilepath)

            df = pd.read_excel(localFilepath,
                               "Major Variables",
                               index_col=None,
                               na_values=['NaN'],
                               skiprows=1,
                               parse_cols="C:E,G:I",
                               header=None)

            #  Save the data as CSV
            outputCSVfileName = self.localTempDirectory + '/SampleData.csv'
            df.to_csv(outputCSVfileName,
                      sep=str(self.job["delimiter"]),
                      encoding='utf-8',
                      index=False)

            # Update the CSV file into a temporary S3 location.  Postgres will download it from there to its local directory
            bucketName, s3TempKey = S3Utilities.UploadFileToS3Temp(
                self.awsParams.s3, outputCSVfileName)

            psConnect = self.GetPSConnection()
            # Postgres tables are created using a connection (rather than psql)
            self.CreatePostgresTables(psConnect)

            postgresTempFile = self.DownloadFromS3ToPSTempDir(
                psConnect, bucketName, s3TempKey)
            self.LoadDataFromPostgresTempDir(psConnect, postgresTempFile)

            S3Utilities.DeleteFile(self.awsParams.s3, bucketName, s3TempKey)

            self.LoadBaseAttributes(psConnect)
            self.LoadBaseData(psConnect, '1000', 'glm_value')
            self.LoadBaseData(psConnect, '2000', 'arima_value')
            self.LoadBaseData(psConnect, '3000', 'lasso_value')
            #           self.LoadBaseData(psConnect,'4000', 'nn_value')
            #            self.LoadBaseData(psConnect,'5000', 'spectre_value')

            psConnect.close()
            self.logger.debug(" SampleData CSV loaded to RedShift")

        except:
            logger.exception(moduleName + " - Exception in start!")
            raise
Beispiel #4
0
    def testDownloadFileFromS3(self):
        testFile = self.createTestingFile(
            "testDownloadFileFromS3.txt",
            "Testing DownloadFileFromS3 from S3Utilities...")
        testFileReturned = testFile.replace(".txt", "_returned.txt")

        bucketName, s3TempKey = S3Utilities.UploadFileToS3Temp(
            self.awsParams.s3, testFile)
        S3Utilities.DownloadFileFromS3(self.awsParams.s3, bucketName,
                                       s3TempKey, testFileReturned)
        self.assertTrue(os.path.isfile(testFileReturned),
                        "File could not be downloaded from the cloud.")
Beispiel #5
0
    def testUploadFileToS3Temp(self):
        testFile = self.createTestingFile(
            "testUploadFileToS3Temp.txt",
            "Testing UploadFileToS3Temp from S3Utilities...")
        testFileReturned = testFile.replace(".txt", "_returned.txt")

        bucketName, s3TempKey = S3Utilities.UploadFileToS3Temp(
            self.awsParams.s3, testFile)

        S3Utilities.DownloadFileFromS3(self.awsParams.s3, bucketName,
                                       s3TempKey, testFileReturned)
        self.assertTrue(os.path.isfile(testFileReturned),
                        "File was not found or uploaded at the cloud bucket.")
Beispiel #6
0
    def testGetS3FileName(self):
        fileNameTested = "testGetS3FileName.txt"
        testFile = self.createTestingFile(
            fileNameTested, "Testing GetS3FileName from S3Utilities...")
        bucketName, s3TempKey = S3Utilities.UploadFileToS3Temp(
            self.awsParams.s3, testFile)
        s3FileName = S3Utilities.GetS3FileName(bucketName, s3TempKey)
        listToValid = s3FileName.split("/")

        self.assertIn(bucketName, listToValid,
                      "s3 File Name does not contain the bucketName.")
        self.assertIn(fileNameTested, listToValid,
                      "s3 File Name does not contain a valid s3TempKey.")
Beispiel #7
0
    def testDeleteFile(self):
        testFile = self.createTestingFile(
            "testDeleteFile.txt", "Testing DeleteFile from S3Utilities...")
        testFileReturned = testFile.replace(".txt", "_returned.txt")

        bucketName, s3TempKey = S3Utilities.UploadFileToS3Temp(
            self.awsParams.s3, testFile)
        S3Utilities.DeleteFile(self.awsParams.s3, bucketName, s3TempKey)

        try:
            S3Utilities.DownloadFileFromS3(self.awsParams.s3, bucketName,
                                           s3TempKey, testFileReturned)
            self.assertFalse(os.path.isfile(testFileReturned),
                             "File was not deleted from the cloud.")
        except Exception as err:
            if err.status != 404:
                self.fail(
                    "Error registered while trying to delete a file from the cloud. Error:"
                    + err.message)