Example #1
0
    def LoadFileIntoRedshift(rsConnect, s3, logger, fileUtilities, localFilepath, destinationSchema,\
                             redshiftDestTable, fileFormat, dateFormat, delimiter, isManifest='N'):
        '''
        Load file from local drive to RedShift
        Zip the file, upload to S3 and then load into RedShift
        '''
        if isManifest == 'Y':
            zipLocalFilepath = localFilepath
        else:
            # Zip the file
            zipLocalFilepath = localFilepath + ".gz"
            fileUtilities.GzipFile(localFilepath, zipLocalFilepath)

        bucketName, s3TempKey = S3Utilities.UploadFileToS3Temp(s3, zipLocalFilepath)

        # Build the job definition file
        job = {}
        job["destinationSchema"] = destinationSchema
        job["tableName"] = redshiftDestTable
        job["s3Filename"] = S3Utilities.GetS3FileName(bucketName, s3TempKey)
        job["fileFormat"] = fileFormat
        job["dateFormat"] = dateFormat
        job["delimiter"] = delimiter

        RedshiftUtilities.LoadDataFromS3(rsConnect, s3, job, logger, isManifest)

        S3Utilities.DeleteFile(s3, bucketName, s3TempKey)
Example #2
0
    def testGetS3FileName(self):
        fileNameTested = "testGetS3FileName.txt"
        testFile = self.createTestingFile(
            fileNameTested, "Testing GetS3FileName from S3Utilities...")
        bucketName, s3TempKey = S3Utilities.UploadFileToS3Temp(
            self.awsParams.s3, testFile)
        s3FileName = S3Utilities.GetS3FileName(bucketName, s3TempKey)
        listToValid = s3FileName.split("/")

        self.assertIn(bucketName, listToValid,
                      "s3 File Name does not contain the bucketName.")
        self.assertIn(fileNameTested, listToValid,
                      "s3 File Name does not contain a valid s3TempKey.")
Example #3
0
    def ProcessLiquidBalanceFile(self):
        '''
        place holder
        '''
        try:
            rsConnect = RedshiftUtilities.Connect(
                dbname=self.awsParams.redshift['Database'],
                host=self.awsParams.redshift['Hostname'],
                port=self.awsParams.redshift['Port'],
                user=self.awsParams.redshiftCredential['Username'],
                password=self.awsParams.redshiftCredential['Password'])

            for sheetConfig in self.job["sheetsToExtract"]:
                self.ExtractSheet(sheetConfig)

                s3key = self.job["s3SrcDirectory"] + "/" + sheetConfig[
                    "outputName"] + "." + self.job["sheetsOutputFormat"] + ".gz"
                self.logger.info(
                    self.moduleName +
                    " Uploading information to redshift for worksheet: " +
                    sheetConfig["name"])

                job = {}
                job["destinationSchema"] = self.job["destinationSchema"]
                job["tableName"] = sheetConfig["tempTableName"]
                job["s3Filename"] = S3Utilities.GetS3FileName(
                    self.job["bucketName"], s3key)
                job["fileFormat"] = self.job["fileFormat"]
                job["dateFormat"] = self.job["dateFormat"]
                job["delimiter"] = sheetConfig["delimiter"]

                RedshiftUtilities.LoadDataFromS3(rsConnect, self.awsParams.s3,
                                                 job, self.logger)
                S3Utilities.DeleteFile(self.awsParams.s3,
                                       self.job["bucketName"], s3key)
        except:
            self.logger.exception(
                self.moduleName +
                " [ProcessLiquidBalanceFile] - We had an error in LiquidsBalance during processBlock"
            )
            raise