def CreateSQLFiles(self, proc, dest): ''' Routine to create sql files to use to create tables in RedShift ''' try: for table in proc["tables"]: fname = self.commonParams["sqlFolder"] + "Create_" + table[ "name"] + ".sql" self.logger.info(fname) outfile = open(fname, "w") outLine = "DROP TABLE IF EXISTS {}.{};".format( dest, table["name"]) outLine = FileUtilities.PutLine(outLine, outfile) outLine = "CREATE TABLE {}.{} (".format(dest, table["name"]) outLine = FileUtilities.PutLine(outLine, outfile) ndx = 0 for fld in table["fields"]: if ndx > 0: outLine = ',' ndx = ndx + 1 outLine = outLine + fld["name"] + " " + fld["type"] if fld["type"] == "VARCHAR": outLine = outLine + "(" + fld["size"] + ") ENCODE LZO" outLine = FileUtilities.PutLine(outLine, outfile) outfile.write(");") outfile.close() except: self.logger.exception(self.moduleName + "- we had an error in CreateSQLFiles ") raise
def SqlToLoadDataFromAthena(logger, tableSettings, fileLoc, partitionValue): # pylint: disable=too-many-branches ''' Create SQL Script to load data from Athena into the redshift table ''' fname = AthenaUtilities.ComposeInsertIntoSqlFilename( tableSettings, fileLoc) outfile = open(fname, "w") athenaSchemaName = AthenaUtilities.ComposeAthenaSchemaName( tableSettings["schemaName"]) schemaDotTableName = athenaSchemaName + '.' + tableSettings["table"] try: # Build the INSERT statement outLine = "INSERT INTO " + tableSettings[ "schemaName"] + '.' + tableSettings["table"] + "\n SELECT" # If we have a date field we need to handle as a special case since Athena/Parquet does not support Date if AthenaUtilities.ContainsSpecialFields(tableSettings): outLine = outLine + AthenaUtilities.HandleSpecialFields( tableSettings) else: outLine = outLine + " * " # Perform a straight copy outLine = outLine + " FROM " + schemaDotTableName # Add where clause if a partition value is specified. Should only be the case with paritioned tables if AthenaUtilities.IsTablePartitioned(tableSettings): if partitionValue: # Copy data smartly rather than copying everything partitionKey = AthenaUtilities.GetPartitionKey( tableSettings) outLine = outLine + "\n WHERE " + partitionKey + " = '" + partitionValue + "'" outLine = outLine + ";" outLine = FileUtilities.PutLine(outLine, outfile) except: # pylint: disable=bare-except logger.exception("problem creating table SQL for " + schemaDotTableName) finally: outfile.close() return fname
def SqlToCompareTables(logger, tableSettings, fname, schema1, table1, validation1, schema2, table2, validation2, whereClause, columnsToExclude): ''' Create SQL Script to load data from Athena into the redshift table ''' sqlScript = "SELECT COUNT(*) FROM (" sqlScript = sqlScript + BuildSqlSubStatement( tableSettings, schema1, table1, validation1, whereClause, columnsToExclude) sqlScript = sqlScript + "\nEXCEPT" sqlScript = sqlScript + BuildSqlSubStatement( tableSettings, schema2, table2, validation2, whereClause, columnsToExclude) sqlScript = sqlScript + "\n);" if fname is not None: outfile = open(fname, "w") FileUtilities.PutLine(sqlScript, outfile) outfile.close() return sqlScript