def execute( query, parameters=None, includeColumnNames=False, incTypeCodes=False, formatter=None, conn=None, connFactory=None, autoCommit=True): """Execute a single SQL query / command against the database. If the description attribute is not None, this implies this was a select statement that produced a result set which will be returned by the fetchall() method. If the description is null, then at least return the rowcount affected by the query. This may be -1 or None still if it is a non-row affecting command (e.g. create / drop). If includeColumnNames is true and the query yields a result set, then one row (list) will be added to the beginning which contains the names of each column as extracted from the cursor.description. If incTypeCodes is true and the query yields a result set, a row (list) will be added to the beginning (but after column names if those are included as well), which contains the numerical type codes of each column as extracted from the cursor.description. This method is probably not terribly efficient and should only be used for prototype testing and short command line functions. For retrieving data to send to stdout or some other stream, add the formatter parameter as an instance of a ResultFormatter object to pipe the data through one fetch at a time. In that case, the full results (which are presumably large) will NOT be returned by the method. If the query object is actually a SQLQuery object, then will use the SQLQuery.getParams() as the params, and str(SQLQuery) as the query string. If autoCommit is True, will autoCommit. The function will also autoCommit if an external connection is NOT supplied. """ # Look for an explicitly specified external connection extConn = conn is not None if conn is None: # If no specific connection object provided, look for a connection factory # to produce one if connFactory is not None: conn = connFactory.connection() else: # No connection or factory specified, just fall back on default connection then conn = connection() cur = conn.cursor() if isinstance(query, SQLQuery): if parameters is None: parameters = tuple(query.getParams()) else: parameters = tuple(parameters) query = str(query) elif parameters is None: parameters = () #log.debug(parameterizeQueryString(query,parameters)); returnValue = None try: timer = time.time(); try: cur.execute( query, parameters ) except Exception, err: log.error(err); #log.error(parameterizeQueryString(query,parameters)); if (not extConn) or autoCommit: conn.rollback(); raise; timer = time.time() - timer; log.debug("Query Time: (%1.3f sec)" % timer ); if cur.description != None: returnValue = [] colNames = None; if includeColumnNames: colNames = columnNamesFromCursor(cur); returnValue.append(colNames) if incTypeCodes: typeCodes = typeCodesFromCursor(cur); returnValue.append(typeCodes); if formatter != None: # An output formatter was specified, pipe the data out one row at time if includeColumnNames: formatter.formatTuple(colNames) progress = ProgressDots(); row = cur.fetchone() while row != None: formatter.formatTuple(row) row = cur.fetchone() progress.Update(); log.info("%d Rows Completed",progress.GetCounts()); returnValue = cur.rowcount else: # No formatter specified, just return the entire result set dataTable = list(cur.fetchall()); for i, row in enumerate(dataTable): dataTable[i] = list(row); returnValue.extend(dataTable); else: returnValue = cur.rowcount if (not extConn) or autoCommit: conn.commit()
def updateFromFile( sourceFile, tableName, columnNames=None, nIdCols=1, delim=None, skipErrors=False, connFactory=None ): """Update the database with the contents of a whitespace-delimited text file. Updates the contents of the <tableName> with the data from the <sourceFile>. One line is expected in the <sourceFile> per row in the database, with each item delimited by the <delim> character (specify None for any whitespace). These items will be inserted under the respective order of the given list of <columnNames>. If the columnNames parameter is not provided, assume the first line of the <sourceFile> contains the column names. To know which rows to update, assume the FIRST column listed in <columnNames> is the ID column to identify rows by. In that case, the data value there from the <sourceFile> will not be used to update the row, but will instead be used to identify the row to update the rest of the data by. If more than one column is necessary to identify a row (composite key), indicate how many of the first columns in <columnNames> should be used with <nIdCols>. Note that these key ID values must not be None / null. The query looks for rows where columnname = value, and the = operator always returns false when the value is null. Returns the total number of rows successfully updated. """ if columnNames is None or len(columnNames) < 1: headerLine = sourceFile.readline(); columnNames = headerLine.split(delim); conn = None; if connFactory is not None: conn = connFactory.connection(); else: conn = connection() cur = conn.cursor() nCols = len(columnNames); try: # Prepare the SQL Statement sql = []; sql.append("update"); sql.append( tableName ); sql.append("set"); # Data Columns for i in xrange(nIdCols,nCols): sql.append(columnNames[i]); sql.append("="); sql.append(Env.SQL_PLACEHOLDER); sql.append(","); sql.pop(); # Remove extra comma at end # ID Columns sql.append("where") for i in xrange(nIdCols): sql.append(columnNames[i]); sql.append("="); sql.append(Env.SQL_PLACEHOLDER); sql.append("and"); sql.pop(); # Remove extra comma at end sql = str.join(" ",sql); log.debug(sql) # Loop through file and execute update statement for every line progress = ProgressDots() for iLine, line in enumerate(sourceFile): if not line.startswith(COMMENT_TAG): try: line = line[:-1]; # Strip the newline character params = line.split(delim); # Special handling for null / None string for iParam in xrange(len(params)): if params[iParam] == "" or params[iParam] == NULL_STRING: # Treat blank strings as NULL params[iParam] = None; # Reposition ID columns to end of parameter list idParams = params[:nIdCols]; dataParams = params[nIdCols:]; paramTuple = dataParams; paramTuple.extend( idParams ); paramTuple = tuple(paramTuple); cur.execute(sql, paramTuple); # Need to "auto-commit" after each command, # otherwise a skipped error will rollback # any previous commands as well if skipErrors: conn.commit() progress.Update() except Exception, err: conn.rollback(); # Reset changes and connection state log.critical(sql); log.critical(paramTuple); log.warning("Error Executing in Script: %s", parameterizeQueryString(sql,paramTuple) ); if skipErrors: log.warning(err) else: raise err conn.commit() return progress.GetCounts();
def syncTable(sourceConn, targetConn, syncTableName, rowIDStrSet=None, formatter=None): if formatter is None: idCol = DBUtil.defaultIDColumn(syncTableName) idQuery = "select %s from %s" % (idCol, syncTableName) # Collect all of the IDs known in the target database and store in memory for rapid lookup print("Querying for IDs from Target Database", file=sys.stderr) targetIdTable = DBUtil.execute(idQuery, conn=targetConn) targetIdSet = set() for row in targetIdTable: targetId = row[0] targetIdSet.add(targetId) # Query data out of the source table, but do it by a cursor so we can stream through large data tables print("Querying for Source Data", file=sys.stderr) dataQuery = "select * from %s" % (syncTableName) sourceCursor = sourceConn.cursor() sourceCursor.execute(dataQuery) colNames = DBUtil.columnNamesFromCursor(sourceCursor) targetCursor = targetConn.cursor() insertQuery = None updateQuery = None progress = ProgressDots() row = sourceCursor.fetchone() while row is not None: dataModel = RowItemModel(row, colNames) if rowIDStrSet is None or str(dataModel[idCol]) in rowIDStrSet: if rowIDStrSet is not None: print("Syncing record: %s" % dataModel[idCol], file=sys.stderr) if dataModel[idCol] not in targetIdSet: # Row does not yet exist in target database, need to insert it if insertQuery is None: insertQuery = DBUtil.buildInsertQuery( syncTableName, list(dataModel.keys())) insertParams = list(dataModel.values()) targetCursor.execute(insertQuery, insertParams) else: # Row already exists in target database, just update values if updateQuery is None: updateQuery = DBUtil.buildUpdateQuery( syncTableName, list(dataModel.keys())) updateParams = [] updateParams.extend(list(dataModel.values())) updateParams.append(dataModel[idCol]) targetCursor.execute(updateQuery, updateParams) if progress.GetCounts() % progress.big == 0: targetConn.commit() row = sourceCursor.fetchone() progress.Update() progress.PrintStatus() targetConn.commit() else: ##Do something with thr formatter ##Set up the formatter theFormatter = formatter(syncTableName, targetConn, includeColumnNames=True, autoCommit=True) #Call DB execute res = DBUtil.execute("select * from %s" % syncTableName, includeColumnNames=True, conn=sourceConn, formatter=theFormatter)