Esempio n. 1
0
 def DownloadFilesFromS3(self):
     '''
     Downloads all files from S3
     '''
     for iso in self.job["iso_files"]:
         keys = S3Utilities.GetListOfFiles(self.awsParams.s3, self.job["bucketName"], self.job["s3SrcDirectory"][1:] + iso["Name"] + "/")
         for key in keys:
             s3Key = "/" + key
             FileUtilities.CreateFolder(self.localTempDirectory + "/" + iso["Name"] + "/")
             localGzipFilepath = self.localTempDirectory + "/" + iso["Name"] + "/" + key.split("/")[-1]
             self.DownloadFile(s3Key, localGzipFilepath)
Esempio n. 2
0
 def DownloadFiles(self):
     '''
     Download the XML files
     '''
     fileList = S3Utilities.GetListOfFiles(self.awsParams.s3, self.job["bucketName"], self.job["s3SrcDirectory"][1:])
     downloadedFiles = []
     for fl in fileList:
         fileName = fl.split("/")[-1]
         s3Key = "/" + fl
         outputPath = self.localTempDirectory + "/" + fileName
         S3Utilities.DownloadFileFromS3(self.awsParams.s3, self.job["bucketName"], s3Key, outputPath)
         downloadedFiles.append(outputPath)
     return downloadedFiles
Esempio n. 3
0
 def GetListOfFilesOnS3(self):
     '''
     Get the list of files on S3 under the given bucket & source directory and download the files
     '''
     try:
         return S3Utilities.GetListOfFiles(self.awsParams.s3,
                                           self.job["bucketName"],
                                           self.job["s3SrcDirectory"][1:])
     except Exception:
         self.logger.exception(
             "Exception in PGCRFERCFilings.GetListOfFilesOnS3")
         self.logger.exception(
             "Exception while fetching the list of files from S3 bucket: {}, path:{}"
             .format(self.job["bucketName"],
                     self.job["s3SrcDirectory"][1:]))
         raise
Esempio n. 4
0
 def DownloadFiles(self):
     '''
     Download the entire bucket of IHSMarkitData
     '''
     fileList = S3Utilities.GetListOfFiles(self.awsParams.s3,
                                           self.job["bucketName"],
                                           self.job["s3SrcDirectory"][1:])
     for fileName in fileList:
         try:
             inputFileFullPath = self.localTempDirectory + "/" + fileName.split(
                 "/")[-1]
             S3Utilities.DownloadFileFromS3(self.awsParams.s3,
                                            self.job["bucketName"],
                                            fileName, inputFileFullPath)
         except Exception:
             self.logger.exception("Download Error for file " + fileName)
             raise
Esempio n. 5
0
 def DownDataFiles(self, dbCommon):
     '''
     Download all the files and unzip them
     '''
     s3SrcDirectory = dbCommon["s3SrcDirectory"]
     bucketName = s3SrcDirectory.replace("s3://","").split("/")[0].strip()
     directory = s3SrcDirectory.replace("s3://"+bucketName, "")
     fileList = S3Utilities.GetListOfFiles(self.awsParams.s3, bucketName, directory[1:])
     for fileName in fileList:
         try:
             inputFileFullPath = self.localTempDirectory + "/" + fileName.split("/")[-1]
             S3Utilities.DownloadFileFromS3(self.awsParams.s3, self.job["bucketName"], fileName, inputFileFullPath)
             unzipFolder = self.fileUtilities.gzipFolder + inputFileFullPath.split('.')[0] + "/"
             self.fileUtilities.UnzipFile(inputFileFullPath, unzipFolder)
         except Exception:
             self.logger.exception("Download Error for file " + fileName)
             raise