Esempio n. 1
0
 def DownloadScriptsFromDesignatedS3Location(tableSettings,
                                             localScriptsFilepath):
     '''
     Download the script files, typically table creation and upload, from the designated S3 location
     '''
     s3FolderLocation = AthenaUtilities.ComposeAthenaS3ScriptKey(
         tableSettings["schemaName"], tableSettings["table"])
     S3Utilities.S3RecursvieCopy(s3FolderLocation, localScriptsFilepath)
     return s3FolderLocation
Esempio n. 2
0
    def DownloadScriptsForRedShift(awsParams, tableSettings,
                                   localScriptsFilepath):
        '''
        Download the script files, typically table creation and upload, from the designated S3 location
        '''
        # Need the proper credentials to write to the Athena lake
        old_key, old_secret_key = awsParams.SwitchS3CredentialsToAthena()

        s3FolderLocation = AthenaUtilities.ComposeAthenaS3ScriptKey(
            tableSettings["schemaName"], tableSettings["table"])
        S3Utilities.S3RecursvieCopy(s3FolderLocation, localScriptsFilepath)

        awsParams.SwitchS3CredentialsTo(old_key, old_secret_key)

        return s3FolderLocation
Esempio n. 3
0
    def UploadDataFilesToDesignatedS3Location(localParquetFilepath,
                                              tableSettings, partitionValue):
        '''
        Upload the data files, typically Parquet files, to the designated S3 location
        '''
        s3FolderLocation = AthenaUtilities.ComposeAthenaS3DataFileKey(
            tableSettings["schemaName"], tableSettings["table"])

        partitionKeyValueFolder = ""
        if AthenaUtilities.IsTablePartitioned(tableSettings):
            partitionKey = AthenaUtilities.GetPartitionKey(tableSettings)
            if not partitionKey:
                raise ValueError(
                    'Partition key cannot be null for partitioned tables.')
            if not partitionValue:
                raise ValueError(
                    'Partition value cannot be null for partitioned tables.')
            partitionKeyValueFolder = partitionKey + "=" + partitionValue + "/"

        s3FolderLocationData = s3FolderLocation + partitionKeyValueFolder

        # Only delete specific partition
        # There is no simple option to delete the whole S3 folder.  It would be easy to make a mistake and delete the entire
        # data set in the passive lake.  Do the FULL deletion MANUALLY
        if tableSettings["new"] == "Y" or ("clearPartition" in tableSettings
                                           and tableSettings["clearPartition"]
                                           == "Y"):
            S3Utilities.DeleteFileFromS3TempUsingAWSCLi(
                s3FolderLocationData, "--recursive")

        # Only copy the *.parquet files
        S3Utilities.S3RecursvieCopy(
            localParquetFilepath, s3FolderLocationData,
            "--exclude \"*\" --include \"*.parquet\" ")

        #=======================================================================
        # For testing purposes - Copy the file to a holding directory
        # import glob
        # import shutil
        # dst = "/s3/" + tableSettings["table"]
        # src = localParquetFilepath + "*.parquet"
        # for fileName in glob.glob(src):
        #     print(fileName)
        #     shutil.move(fileName, dst)
        #=======================================================================

        return s3FolderLocation