def UploadPackedToS3(self): ''' Uploads all files packed to s3. ''' self.logger.info("Uploading GZIP files to s3 folder...") S3Utilities.CopyItemsAWSCli(self.packedFolder, "s3://" + self.job["bucketName"] + self.job["s3ToDirectory"], "--recursive --quiet")
def BulkUploadToS3(self): ''' Uploads all GZIP files created into S3 to be uploaded later... ''' self.logger.info(self.moduleName + " - Uploading GZIP files to s3 folder...") fileName = self.processingFile.split(".")[0] + ".csv.gz" S3Utilities.CopyItemsAWSCli(self.localTempDirectory + "/" + fileName, "s3://" + self.job["bucketName"] + self.job["s3ToDirectory"], "--quiet")
def UploadToS3(self): ''' Uploads all GZIP files created into S3 to be uploaded later... ''' self.logger.info(self.moduleName + " - Uploading GZIP files to s3 folder...") fileName = self.job["fileNameOut"] + ".gz" fileNameHistory = self.job["fileNameOutHistory"] + ".gz" S3Utilities.CopyItemsAWSCli( self.localTempDirectory + "/" + fileName, 's3://' + self.job["bucketName"] + self.job["s3ToDirectory"] + '/' + fileName) S3Utilities.CopyItemsAWSCli( self.localTempDirectory + "/" + fileNameHistory, 's3://' + self.job["bucketName"] + self.job["s3ToDirectory"] + '/' + fileNameHistory)
def BulkUploadToS3(self, srcCategory): ''' Uploads all GZIP files created into S3 to be uploaded later... ''' self.logger.info(self.moduleName + " - Uploading GZIP files to s3 folder...") s3Location = "s3://" + self.job["bucketName"] + self.job["s3GzipFolderBase"] +\ "/" + srcCategory["srcCategory"] S3Utilities.CopyItemsAWSCli(self.fileUtilities.gzipFolder, s3Location, "--recursive --quiet")
def BulkUploadToS3(self): ''' Uploads all GZIP files created into S3 to be uploaded later... ''' self.logger.info(self.moduleName + " - Uploading GZIP files to s3 folder...") S3Utilities.CopyItemsAWSCli( self.fileUtilities.gzipFolder, "s3://" + self.job["bucketName"] + self.job["s3GzipFolderBase"] + "/Data", "--recursive --quiet")
def MoveFolderToS3(self): ''' ''' bucketName = "ihs-temp" s3GzipFolderBase = "/viu53188" s3subfolder = "EHSA" s3Location = "s3://" + bucketName + s3GzipFolderBase + "/" +\ "test/" + s3subfolder localFilepath = "C:\\WorkSpaceEclipse36\\EAA_Dataloader_Data\\input\\Play\\gzip" S3Utilities.CopyItemsAWSCli(localFilepath, s3Location, "--recursive --quiet")
def UploadPackedToS3(self): ''' Uploads all files packed to s3. ''' for fp in self.job["foxpro_files"]: self.logger.info("Uploading GZIP files to s3 folder...") inputFolderPath = self.localTempDirectory + "/packed/" + fp[ "Name"] + "/" S3Utilities.CopyItemsAWSCli( inputFolderPath, "s3://" + self.job["bucketName"] + self.job["s3ToDirectory"] + fp["Name"] + "/", "--recursive --quiet")
def PullData(self): ''' routine to pull data from s3 to local instance ''' sourceLocation = "s3://" + self.job["bucketName"] + self.job["s3DataFolder"] try: destLocation = self.localTempDataDirectory S3Utilities.CopyItemsAWSCli(sourceLocation, destLocation, '''--recursive --quiet --include "*.zip"''') except: self.logger.exception(self.moduleName + " had an issue in pullData for " + sourceLocation) raise
def UploadPackedToS3(self): ''' Uploads all files packed to s3. ''' self.logger.info("Uploading GZIP files to s3 folder...") for folder in [ flder for flder in list(self.job["folderPath"].keys()) if flder != "raw" ]: S3Utilities.CopyItemsAWSCli( self.localTempDirectory + self.job["folderPath"][folder], "s3://" + self.job["bucketName"] + self.job["s3ToDirectory"][folder], "--recursive --quiet")
def BulkDownload(self): ''' Download the entire bucket of EIA 860 ''' for path in self.job["s3SrcDirectory"]: try: sourcePath = "s3://" + self.job["bucketName"] + "/" + path outputPath = self.localTempDirectory + "/" S3Utilities.CopyItemsAWSCli(sourcePath, outputPath, "--recursive --quiet") except: self.logger.exception( "Exception in PGCREIA860.BulkDownload. Location {}".format( sourcePath)) raise
def DownloadFilesFromS3(self, tablesJson): ''' Download all files from the s3 data folder. ''' try: self.logger.debug(self.moduleName + " -- " + "DownloadFilesFromS3" + " starting ") rawFolder = self.localTempDirectory + '/raw/' S3Utilities.CopyItemsAWSCli("s3://" + tablesJson["srcBucketName"] + tablesJson["srcS3DataFolder"], rawFolder, "--recursive --quiet") self.logger.debug(self.moduleName + " -- " + "DownloadFilesFromS3" + " finished ") except Exception as err: self.logger.error(self.moduleName + " - Error while trying to download files from s3. Error: " + err.message) raise
def LoadAllData(self): ''' Process: 1) push Attribute and data gz files to S3 2) load data into Redshift from S3 ''' self.CreateFolders("N") # this just sets the variable we will need self.fileUtilities = FileUtilities(self.logger) rsConnect = RedshiftUtilities.Connect(dbname=self.awsParams.redshift['Database'], host=self.awsParams.redshift['Hostname'], port=self.awsParams.redshift['Port'], user=self.awsParams.redshiftCredential['Username'], password=self.awsParams.redshiftCredential['Password']) for table in self.job["tables"]: ### # first create zip files for all we want to send to S3 ### s3folder = "s3://" + self.job["bucketName"] + self.job["s3GzipFolderBase"] if table["type"] == "attributes": sourceFolder = self.gzipFolder + "attr" destFolder = s3folder + "/attribute" else: # data types sourceFolder = self.gzipFolder + "data" destFolder = s3folder + "/data" S3Utilities.CopyItemsAWSCli(sourceFolder, destFolder, '''--recursive --quiet --include "*.gz"''') RedshiftUtilities.LoadDataFromS3(rsConnect, self.awsParams.s3, { "destinationSchema": self.job["destinationSchema"], "tableName": table["name"], "s3Filename": destFolder, "fileFormat": self.job["fileFormat"], "dateFormat": self.job["dateFormat"], "delimiter": self.job["delimiter"] }, self.logger, "N") # S3Utilities.DeleteFileFromS3TempUsingAWSCLi(destFolder, # '''--recursive --quiet --include "*.gz"''') rsConnect.close()
def DownloadFilesFromS3(self, tablesJson): ''' Download files from the s3 data folder. ''' try: self.logger.debug(self.moduleName + " -- " + "DownloadFileFromS3" + " starting " + tablesJson["srcFile"]) S3Utilities.CopyItemsAWSCli( "s3://" + tablesJson["srcBucketName"] + tablesJson["srcS3DataFolder"] + tablesJson["srcFile"], self.fileUtilities.csvFolder, "--quiet") self.logger.debug(self.moduleName + " -- " + "DownloadFileFromS3" + " finished " + tablesJson["srcFile"]) except Exception as err: self.logger.error( self.moduleName + " - Error while trying to download file from s3. Error: " + err.message) raise
def LoadClassRefDF(self, spark): ''' Loads de class reference data ''' xReferencesDF = {} for catalog in self.job["catalogs"]: if catalog["name"] == "xReferences": for xrefTable in catalog["tables"]: if self.xRefPulled is False: S3Utilities.CopyItemsAWSCli( "s3://" + self.job["bucketName"] + xrefTable["s3SourceFolder"] + xrefTable["sourceFileName"], self.fileUtilities.csvFolder, "--quiet") xReferencesDF[ xrefTable["table"]] = SparkUtilities.ReadCSVFile( spark, xrefTable, self.job["delimiter"], False, self.fileUtilities.csvFolder + "/" + xrefTable["sourceFileName"], self.logger) self.xRefPulled = True return xReferencesDF