Python FileUtilities.LoadSQLQueryの例

プログラミング言語: Python

名前空間/パッケージ名: AACloudTools.FileUtilities

クラス/型: FileUtilities

メソッド/関数: LoadSQLQuery

hotexamples.comのコード掲載数: 2

Python FileUtilities.LoadSQLQuery - 2件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのAACloudTools.FileUtilities.FileUtilities.LoadSQLQueryの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

PathToForwardSlash(30)

RemoveFileIfItExists(19)

CreateFolder(15)

FileUtilities(11)

EmptyFolderContents(10)

RemoveFolder(10)

GzipFile(6)

ScanFolder(6)

CreateLogger(5)

DeleteFile(4)

CreateTableSql(3)

RemoveLines(3)

PutLine(3)

LoadSQLQuery(2)

UnzipUsing7z(2)

ReplaceIterativelyInFile(2)

ComposeCreateTableSqlFilename(2)

CreateFolders(2)

LoadJobConfiguration(1)

CreateActualFileFromTemplate(1)

GetLineCount(1)

GetFileContents(1)

FilesExistInFolder(1)

RemoveHeader(1)

DownloadFromURLUserPassword(1)

DownloadFromURL(1)

ReplaceStringInFile(1)

UnzipFile(1)

CreateFolder2(1)

WriteToFile(1)

コード例 #1

ファイルを表示

ファイル: Vantage.py プロジェクト: eulertech/backup

class Vantage(ApplicationBase):
    '''
    This class is used to get the Vanatage data from IHS Vantage Database, transform it and load it into Redshift.
    '''
    def __init__(self):
        '''
        Initial settings
        '''
        super(Vantage, self).__init__()

        self.awsParams = ""
        self.packedFolder = None
        self.rawFolder = None
        self.fileUtilities = FileUtilities(self.logger)
        self.location = FileUtilities.PathToForwardSlash(
            os.path.dirname(os.path.abspath(__file__)))

    def BulkExtractAll(self):
        '''
        Controls the flow thru the different data sets coming from Vantage DB.
        '''
        try:
            for dsScript in self.job["extractingScripts"]:
                self.logger.info(self.moduleName + " Starts extracting " +
                                 dsScript["tableSuffix"] + " data...")

                self.bcpUtilities.RunBCPJob(
                    self.job["mssqlLoginInfo"],
                    self.job["bcpUtilityDirOnLinux"],
                    self.fileUtilities.LoadSQLQuery(self.location +
                                                    dsScript["scriptFile"]),
                    self.localTempDirectory + "/Raw/" +
                    dsScript["tableSuffix"] + ".CSV", self.job["delimiter"])
        except Exception as err:
            self.logger.error(
                "Error while trying to Bulk Extract all. Message: " +
                err.message)
            raise

    def TransformAndPackAll(self):
        '''
        Compress the csv files created.
        '''
        rawFiles = self.fileUtilities.ScanFolder(self.rawFolder, None, "CSV")

        try:
            for rFile in rawFiles:
                rFileFull = self.rawFolder + "/" + rFile

                self.logger.info(self.moduleName +
                                 " started compressing file: " + rFile)

                self.fileUtilities.GzipFile(
                    rFileFull, self.packedFolder + "/" + rFile + ".GZ")

                self.fileUtilities.RemoveFileIfItExists(rFileFull)
        except Exception as err:
            self.logger.error(self.moduleName +
                              " Error while compressing raw files. Message: " +
                              err.message)
            raise

    def LoadAllFromS3(self):
        '''
        Load all CSVs from the Vantage's S3 bucket into Redshift
        '''
        rsConnect = None

        try:
            s3DataFolder = "s3://" + self.job["bucketName"] + self.job[
                "s3ToDirectory"]

            rsConnect = RedshiftUtilities.Connect(
                dbname=self.awsParams.redshift['Database'],
                host=self.awsParams.redshift['Hostname'],
                port=self.awsParams.redshift['Port'],
                user=self.awsParams.redshiftCredential['Username'],
                password=self.awsParams.redshiftCredential['Password'])

            for dsScript in self.job["extractingScripts"]:
                RedshiftUtilities.LoadDataFromS3(
                    rsConnect, self.awsParams.s3, {
                        "destinationSchema":
                        self.job["destinationSchema"],
                        "tableName":
                        self.job["tableName"] + dsScript["tableSuffix"],
                        "s3Filename":
                        s3DataFolder + "/" + dsScript["tableSuffix"] +
                        ".CSV.GZ",
                        "fileFormat":
                        self.job["fileFormat"],
                        "dateFormat":
                        self.job["dateFormat"],
                        "delimiter":
                        self.job["delimiter"]
                    }, self.logger, "N")

            self.logger.info(self.moduleName + " - Cleaning s3 data folder...")

            S3Utilities.DeleteFileFromS3TempUsingAWSCLi(
                s3DataFolder, "--recursive --quiet")
        except Exception:
            self.logger.error(
                self.moduleName +
                " - Error while trying to save into Redshift from s3 folder.")
            raise
        finally:
            if rsConnect is not None:
                rsConnect.close()

    def BulkUploadToS3(self):
        '''
        Uploads all GZIP files created into S3 to be uploaded later...
        '''
        self.logger.info(self.moduleName +
                         " - Uploading GZIP files to s3 folder...")

        S3Utilities.CopyItemsAWSCli(
            self.packedFolder,
            "s3://" + self.job["bucketName"] + self.job["s3ToDirectory"],
            "--recursive --quiet")

    def Start(self, logger, moduleName, filelocs):
        try:
            ApplicationBase.Start(self, logger, moduleName, filelocs)

            self.packedFolder = self.localTempDirectory + "/Packed"
            self.rawFolder = self.localTempDirectory + "/Raw"

            self.fileUtilities.RemoveFolder(self.packedFolder)
            self.fileUtilities.RemoveFolder(self.rawFolder)

            self.fileUtilities.CreateFolder(self.packedFolder)
            self.fileUtilities.CreateFolder(self.rawFolder)

            self.BulkExtractAll()
            self.TransformAndPackAll()
            self.BulkUploadToS3()
            self.LoadAllFromS3()
        except Exception as err:
            self.logger.exception(moduleName + " - Exception! Error: " +
                                  err.message)
            raise Exception(err.message)

コード例 #2

ファイルを表示

class VantageAthenaSpark(ApplicationBase):
    '''
    This class is used to get the Vantage data from IHS Vantage Database, and load it into Athena.
    '''
    def __init__(self):
        '''
        Initial settings
        '''
        super(VantageAthenaSpark, self).__init__()

        self.fileUtilities = FileUtilities(self.logger)
        self.location = FileUtilities.PathToForwardSlash(
            os.path.dirname(os.path.abspath(__file__)))

    def BulkExtract(self, datasetName, scriptName, common):
        '''
        Bulk extracts the different data sets coming from Vantage DB.
        '''
        try:
            fileName = self.localTempDirectory + "/csv/" + datasetName + ".CSV"

            self.bcpUtilities.RunBCPJob(
                common["mssqlLoginInfo"], self.job["bcpUtilityDirOnLinux"],
                self.fileUtilities.LoadSQLQuery(self.location + scriptName),
                fileName, self.job["delimiter"])

            return fileName
        except Exception as err:
            self.logger.error("Error while trying to Bulk Extract. Message: " +
                              err.message)
            raise

    def ProcessTables(self, dbCommon, tables):
        '''
        Process each Vantage table.
        '''
        try:
            self.logger.debug(self.moduleName + " -- " +
                              "Processing data for table:" + tables["table"])
            fileName = self.BulkExtract(tables["table"], tables["scriptFile"],
                                        dbCommon)

            spark = SparkUtilities.GetCreateSparkSession(self.logger)
            schema = SparkUtilities.BuildSparkSchema(tables)
            df = (spark.read.format("com.databricks.spark.csv").options(
                header='false',
                delimiter=self.job["delimiter"]).schema(schema).load(fileName))

            self.logger.info(self.moduleName + " -- " + "Done reading " +
                             str(df.count()) +
                             " rows.  Now saving as parquet file...")
            SparkUtilities.SaveParquet(df, self.fileUtilities)
            self.UploadFilesCreateAthenaTablesAndSqlScripts(
                tables, self.fileUtilities.parquet)
            self.LoadDataFromAthenaIntoRedShiftLocalScripts(tables)
        except Exception as err:
            self.logger.error(self.moduleName +
                              " - Error while trying to load table. Error: " +
                              err.message)
            raise

    def Start(self, logger, moduleName, filelocs):
        '''
        main routine
        '''
        ApplicationBase.Start(self, logger, moduleName, filelocs)
        # At some point this will be part of Start
        ApplicationBase.ProcessInput(self, logger, moduleName, filelocs)