Python S3Utilities.S3Copy Exemples

Langage de programmation: Python

Espace de nommage/Pack: AACloudTools.S3Utilities

Class/Type: S3Utilities

Méthode/Fonction: S3Copy

Exemples au hotexamples.com: 3

Python S3Utilities.S3Copy - 3 exemples trouvés. Ce sont les exemples réels les mieux notés de AACloudTools.S3Utilities.S3Utilities.S3Copy extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

DownloadFileFromS3(17)

CopyItemsAWSCli(14)

DeleteFileFromS3TempUsingAWSCLi(8)

UploadFileToS3Temp(7)

DeleteFile(5)

GetListOfFiles(5)

SyncFolderAWSCli(5)

GetS3FileName(3)

S3Copy(3)

S3RecursvieCopy(3)

KeyExist(2)

GetFilesNModifiedDatetimeFromS3(1)

GetFilesSinceGivenDatetime(1)

UploadFileToS3(1)

Méthodes fréquemment utilisées

DownloadFileFromS3 (17)

CopyItemsAWSCli (14)

DeleteFileFromS3TempUsingAWSCLi (8)

UploadFileToS3Temp (7)

DeleteFile (5)

GetListOfFiles (5)

SyncFolderAWSCli (5)

GetS3FileName (3)

S3Copy (3)

S3RecursvieCopy (3)

Méthodes fréquemment utilisées

KeyExist (2)

GetFilesNModifiedDatetimeFromS3 (1)

GetFilesSinceGivenDatetime (1)

UploadFileToS3 (1)

Exemple #1

0

Afficher le fichier

def UploadScriptsToDesignatedS3Location(localScriptsFilepath, tableSettings): ''' Upload the script files, typically table creation and upload, to the designated S3 location ''' s3FolderLocation = AthenaUtilities.ComposeAthenaS3ScriptKey( tableSettings["schemaName"], tableSettings["table"]) S3Utilities.DeleteFileFromS3TempUsingAWSCLi(s3FolderLocation, "--recursive") # Upload only scripts that we plan to keep for later reuse scriptToCreateRedshift = FileUtilities.ComposeCreateTableSqlFilename( tableSettings, localScriptsFilepath) scriptToInsertIntoRedshift = AthenaUtilities.ComposeInsertIntoSqlFilename( tableSettings, localScriptsFilepath) S3Utilities.S3Copy(scriptToCreateRedshift, s3FolderLocation) S3Utilities.S3Copy(scriptToInsertIntoRedshift, s3FolderLocation) return s3FolderLocation

Exemple #2

0

Afficher le fichier

def ProcessCatalogs(self, dbCommon, catalog): ''' Process each file ''' # Load the data from the S3 data lake into Redshift using Athena/Redshift Spectrum s3Key = dbCommon["s3SrcDirectory"] + "/" + catalog["s3Filename"] self.logger.info(self.moduleName + " - Processing file: " + s3Key) FileUtilities.EmptyFolderContents( self.fileUtilities.gzipFolder ) # Clear the folder from the previous run FileUtilities.EmptyFolderContents( self.fileUtilities.csvFolder ) # Clear the folder from the previous run fileName = ntpath.basename(s3Key) localGzipFilepath = self.fileUtilities.gzipFolder + "/" + fileName S3Utilities.S3Copy(s3Key, localGzipFilepath) localExcelFilepath = self.fileUtilities.csvFolder + "/" + fileName # Remove the gz extension localExcelFilepath = re.sub(r'\.gz$', '', localExcelFilepath) self.fileUtilities.GunzipFile(localGzipFilepath, localExcelFilepath) # Don't have a raw excel reader for Spark so use Pandas self.logger.info(self.moduleName + " - Processing Excel file: " + localExcelFilepath) pandasDf = pd.read_excel(localExcelFilepath, catalog["excelSheetName"], index_col=None, na_values=['NaN'], skiprows=catalog["skipRows"]) pandasDf = PandasUtilities.ConvertDateTimeToObject(pandasDf) spark = SparkUtilities.GetCreateSparkSession(self.logger) table = catalog["tables"][0] # There is only table in a catalog schema = SparkUtilities.BuildSparkSchema(table) df = spark.createDataFrame(pandasDf, schema) df = SparkUtilities.ConvertNanToNull(df) SparkUtilities.SaveParquet(df, self.fileUtilities) self.UploadFilesCreateAthenaTablesAndSqlScripts( table, self.fileUtilities.parquet) self.LoadDataFromAthenaIntoRedShiftS3Scripts(table) self.logger.debug(self.moduleName + " -- " + "ProcessS3File for file: " + s3Key + " finished.\n\n")

Exemple #3

0

Afficher le fichier

def ProcessTables(self, dbCommon, tables): ''' Process the data for the table ''' s3Key = self.job["s3Filename"] self.logger.info(self.moduleName + " - Processing file: " + s3Key) fileName = ntpath.basename(s3Key) localGzipFilepath = self.fileUtilities.gzipFolder + "/" + fileName S3Utilities.S3Copy(s3Key, localGzipFilepath) # Unzip the file rather than reading the gzip as Spark is faster with csv localCSVFilepath = self.fileUtilities.csvFolder + "/" + fileName + ".csv" self.fileUtilities.GunzipFile(localGzipFilepath, localCSVFilepath) spark = SparkUtilities.GetCreateSparkSession(self.logger) df = SparkUtilities.ReadCSVFile(spark, tables, self.job["delimiter"], True, self.fileUtilities.csvFolder, self.logger) SparkUtilities.SaveParquet(df, self.fileUtilities) self.UploadFilesCreateAthenaTablesAndSqlScripts(tables, self.fileUtilities.parquet) self.LoadDataFromAthenaIntoRedShiftS3Scripts(tables) self.logger.info(self.moduleName + " -- " + "ProcessTable " + " finished ")