Beispiel #1
0
 def UploadPackedToS3(self):
     '''
     Uploads all files packed to s3.
     '''
     self.logger.info("Uploading GZIP files to s3 folder...")
     S3Utilities.CopyItemsAWSCli(self.packedFolder,
                                 "s3://" + self.job["bucketName"] + self.job["s3ToDirectory"],
                                 "--recursive --quiet")
Beispiel #2
0
    def BulkUploadToS3(self):
        '''
        Uploads all GZIP files created into S3 to be uploaded later...
        '''
        self.logger.info(self.moduleName + " - Uploading GZIP files to s3 folder...")

        fileName = self.processingFile.split(".")[0] + ".csv.gz"
        S3Utilities.CopyItemsAWSCli(self.localTempDirectory + "/" + fileName,
                                    "s3://" + self.job["bucketName"] + self.job["s3ToDirectory"], "--quiet")
Beispiel #3
0
    def UploadToS3(self):
        '''
        Uploads all GZIP files created into S3 to be uploaded later...
        '''
        self.logger.info(self.moduleName +
                         " - Uploading GZIP files to s3 folder...")
        fileName = self.job["fileNameOut"] + ".gz"
        fileNameHistory = self.job["fileNameOutHistory"] + ".gz"

        S3Utilities.CopyItemsAWSCli(
            self.localTempDirectory + "/" + fileName,
            's3://' + self.job["bucketName"] + self.job["s3ToDirectory"] +
            '/' + fileName)

        S3Utilities.CopyItemsAWSCli(
            self.localTempDirectory + "/" + fileNameHistory,
            's3://' + self.job["bucketName"] + self.job["s3ToDirectory"] +
            '/' + fileNameHistory)
Beispiel #4
0
 def BulkUploadToS3(self, srcCategory):
     '''
     Uploads all GZIP files created into S3 to be uploaded later...
     '''
     self.logger.info(self.moduleName +
                      " - Uploading GZIP files to s3 folder...")
     s3Location = "s3://" + self.job["bucketName"] + self.job["s3GzipFolderBase"] +\
                  "/" + srcCategory["srcCategory"]
     S3Utilities.CopyItemsAWSCli(self.fileUtilities.gzipFolder, s3Location,
                                 "--recursive --quiet")
Beispiel #5
0
    def BulkUploadToS3(self):
        '''
        Uploads all GZIP files created into S3 to be uploaded later...
        '''
        self.logger.info(self.moduleName +
                         " - Uploading GZIP files to s3 folder...")

        S3Utilities.CopyItemsAWSCli(
            self.fileUtilities.gzipFolder, "s3://" + self.job["bucketName"] +
            self.job["s3GzipFolderBase"] + "/Data", "--recursive --quiet")
Beispiel #6
0
 def MoveFolderToS3(self):
     '''
     '''
     bucketName = "ihs-temp"
     s3GzipFolderBase = "/viu53188"
     s3subfolder = "EHSA"
     s3Location = "s3://" + bucketName + s3GzipFolderBase + "/" +\
                 "test/" + s3subfolder
     localFilepath = "C:\\WorkSpaceEclipse36\\EAA_Dataloader_Data\\input\\Play\\gzip"
     S3Utilities.CopyItemsAWSCli(localFilepath, s3Location,
                                 "--recursive --quiet")
Beispiel #7
0
 def UploadPackedToS3(self):
     '''
     Uploads all files packed to s3.
     '''
     for fp in self.job["foxpro_files"]:
         self.logger.info("Uploading GZIP files to s3 folder...")
         inputFolderPath = self.localTempDirectory + "/packed/" + fp[
             "Name"] + "/"
         S3Utilities.CopyItemsAWSCli(
             inputFolderPath, "s3://" + self.job["bucketName"] +
             self.job["s3ToDirectory"] + fp["Name"] + "/",
             "--recursive --quiet")
Beispiel #8
0
 def PullData(self):
     '''
     routine to pull data from s3 to local instance
     '''
     sourceLocation = "s3://" + self.job["bucketName"] + self.job["s3DataFolder"]
     try:
         destLocation = self.localTempDataDirectory
         S3Utilities.CopyItemsAWSCli(sourceLocation,
                                     destLocation,
                                     '''--recursive --quiet --include "*.zip"''')
     except:
         self.logger.exception(self.moduleName + " had an issue in pullData for " + sourceLocation)
         raise
 def UploadPackedToS3(self):
     '''
     Uploads all files packed to s3.
     '''
     self.logger.info("Uploading GZIP files to s3 folder...")
     for folder in [
             flder for flder in list(self.job["folderPath"].keys())
             if flder != "raw"
     ]:
         S3Utilities.CopyItemsAWSCli(
             self.localTempDirectory + self.job["folderPath"][folder],
             "s3://" + self.job["bucketName"] +
             self.job["s3ToDirectory"][folder], "--recursive --quiet")
Beispiel #10
0
 def BulkDownload(self):
     '''
     Download the entire bucket of EIA 860
     '''
     for path in self.job["s3SrcDirectory"]:
         try:
             sourcePath = "s3://" + self.job["bucketName"] + "/" + path
             outputPath = self.localTempDirectory + "/"
             S3Utilities.CopyItemsAWSCli(sourcePath, outputPath,
                                         "--recursive --quiet")
         except:
             self.logger.exception(
                 "Exception in PGCREIA860.BulkDownload. Location {}".format(
                     sourcePath))
             raise
Beispiel #11
0
    def DownloadFilesFromS3(self, tablesJson):
        '''
        Download all files from the s3 data folder.
        '''
        try:
            self.logger.debug(self.moduleName + " -- " + "DownloadFilesFromS3" + " starting ")
            rawFolder = self.localTempDirectory + '/raw/'
            S3Utilities.CopyItemsAWSCli("s3://" + tablesJson["srcBucketName"] + tablesJson["srcS3DataFolder"],
                                        rawFolder,
                                        "--recursive --quiet")

            self.logger.debug(self.moduleName + " -- " + "DownloadFilesFromS3" + " finished ")
        except Exception as err:
            self.logger.error(self.moduleName + " - Error while trying to download files from s3. Error: " + err.message)
            raise
Beispiel #12
0
    def LoadAllData(self):
        '''
        Process:
        1)  push Attribute and data gz files to S3
        2)  load data into Redshift from S3
        '''
        self.CreateFolders("N")  #  this just sets the variable we will need
        self.fileUtilities = FileUtilities(self.logger)

        rsConnect = RedshiftUtilities.Connect(dbname=self.awsParams.redshift['Database'],
                                              host=self.awsParams.redshift['Hostname'],
                                              port=self.awsParams.redshift['Port'],
                                              user=self.awsParams.redshiftCredential['Username'],
                                              password=self.awsParams.redshiftCredential['Password'])

        for table in self.job["tables"]:
            ###
            #  first create zip files for all we want to send to S3
            ###
            s3folder = "s3://" + self.job["bucketName"] + self.job["s3GzipFolderBase"]
            if table["type"] == "attributes":
                sourceFolder = self.gzipFolder + "attr"
                destFolder = s3folder + "/attribute"
            else:  # data types
                sourceFolder = self.gzipFolder + "data"
                destFolder = s3folder + "/data"

            S3Utilities.CopyItemsAWSCli(sourceFolder,
                                        destFolder,
                                        '''--recursive --quiet --include "*.gz"''')

            RedshiftUtilities.LoadDataFromS3(rsConnect, self.awsParams.s3,
                                             {
                                                 "destinationSchema": self.job["destinationSchema"],
                                                 "tableName": table["name"],
                                                 "s3Filename": destFolder,
                                                 "fileFormat": self.job["fileFormat"],
                                                 "dateFormat": self.job["dateFormat"],
                                                 "delimiter": self.job["delimiter"]
                                             },
                                             self.logger, "N")

#            S3Utilities.DeleteFileFromS3TempUsingAWSCLi(destFolder,
#                                                        '''--recursive --quiet --include "*.gz"''')

        rsConnect.close()
Beispiel #13
0
 def DownloadFilesFromS3(self, tablesJson):
     '''
     Download files from the s3 data folder.
     '''
     try:
         self.logger.debug(self.moduleName + " -- " + "DownloadFileFromS3" +
                           " starting " + tablesJson["srcFile"])
         S3Utilities.CopyItemsAWSCli(
             "s3://" + tablesJson["srcBucketName"] +
             tablesJson["srcS3DataFolder"] + tablesJson["srcFile"],
             self.fileUtilities.csvFolder, "--quiet")
         self.logger.debug(self.moduleName + " -- " + "DownloadFileFromS3" +
                           " finished " + tablesJson["srcFile"])
     except Exception as err:
         self.logger.error(
             self.moduleName +
             " - Error while trying to download file from s3. Error: " +
             err.message)
         raise
    def LoadClassRefDF(self, spark):
        '''
        Loads de class reference data
        '''
        xReferencesDF = {}

        for catalog in self.job["catalogs"]:
            if catalog["name"] == "xReferences":
                for xrefTable in catalog["tables"]:
                    if self.xRefPulled is False:
                        S3Utilities.CopyItemsAWSCli(
                            "s3://" + self.job["bucketName"] +
                            xrefTable["s3SourceFolder"] +
                            xrefTable["sourceFileName"],
                            self.fileUtilities.csvFolder, "--quiet")

                    xReferencesDF[
                        xrefTable["table"]] = SparkUtilities.ReadCSVFile(
                            spark, xrefTable, self.job["delimiter"], False,
                            self.fileUtilities.csvFolder + "/" +
                            xrefTable["sourceFileName"], self.logger)

        self.xRefPulled = True
        return xReferencesDF